代码已经发布到github 分支lite https://github.com/AIddlx/MNN/tree/lite/apps/Android/MnnLlmChat
端口默认8080,支持局域网内任何设备对话,仅支持文本
跟多的实现参考https://github.com/AIddlx/MNN 我实现的这个分叉,按官方原版方式自行编译
演示视频可在b站找到
【手机离线跑大模型,还能对外提供API-哔哩哔哩】
【手机运行大模型并提供API_支持图片-哔哩哔哩】
b站、小红书搜哆哆梨想
如有问题出现 关注+私信
以下内容是今天的主题
代码部分
MainActivity.java的onCreate的最后一行添加 OpenAIService.getInstance(this);
在
ChatActivity.java中的这个位置添加 OpenAIService .setCurrentSession(chatSession);
chatExecutor.submit(() -> {
Log.d(TAG, "chatSession loading");
setIsLoading(true);
chatSession.load();
setIsLoading(false);
Log.d(TAG, "chatSession loaded");
OpenAIService .setCurrentSession(chatSession);
}); 在这个位置用
apps\Android\MnnLlmChat\app\src\main\java\目录下新建文件夹ddlx再新建文件api,创建文件OpenAIService.java
package ddlx.api;
import android.app.Service;
import android.content.Context;
import android.content.Intent;
import android.os.Binder;
import android.os.IBinder;
import com.alibaba.mls.api.ApplicationProvider;
import com.alibaba.mnnllm.android.ChatSession;
import fi.iki.elonen.NanoHTTPD;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.*;
import java.nio.charset.StandardCharsets;
public class OpenAIService extends Service {
// private static final String TAG = "OpenAIService";
private static final int PORT = 8080;
private static volatile OpenAIService instance;
private ApiServer server;
private static ChatSession currentSession;
public static OpenAIService getInstance(Context context ) {
if (instance == null) {
context.startService(new Intent(context, OpenAIService.class));
}
return instance;
}
public static void setCurrentSession(ChatSession session) {
currentSession = session;
}
public static ChatSession getCurrentSession() {
return currentSession;
}
@Override
public void onCreate() {
super.onCreate();
instance = this;
}
@Override
public int onStartCommand(Intent intent, int flags, int startId) {
try {
if (server != null) {
server.stop();
}
server = new ApiServer();
server.start();
Intent readyIntent = new Intent("com.alibaba.mnnllm.android.SERVICE_READY");
sendBroadcast(readyIntent);
} catch (IOException e) {
Intent failureIntent = new Intent("com.alibaba.mnnllm.android.SERVICE_FAILED");
failureIntent.putExtra("error", e.getMessage());
sendBroadcast(failureIntent);
}
return START_STICKY;
}
@Override
public void onDestroy() {
if (server != null) {
server.stop();
}
instance = null;
super.onDestroy();
}
@Override
public IBinder onBind(Intent intent) {
return new Binder();
}
private class ApiServer extends NanoHTTPD {
ApiServer() throws IOException {
super("0.0.0.0", PORT);
}
@Override
public Response serve(IHTTPSession session) {
if (Method.OPTIONS.equals(session.getMethod())) {
return addCorsHeaders(newFixedLengthResponse(""));
}
try {
String uri = session.getUri();
if ("/v1/chat/completions".equals(uri)) {
return addCorsHeaders(handleChatCompletions(session));
}
return addCorsHeaders(newFixedLengthResponse(Response.Status.NOT_FOUND,
"application/json", "{\"error\":\"Not found\"}"));
} catch (Exception e) {
return addCorsHeaders(newFixedLengthResponse(Response.Status.INTERNAL_ERROR,
"application/json", "{\"error\":\"Internal server error\"}"));
}
}
private Response handleChatCompletions(IHTTPSession session) throws Exception {
// String method = session.getMethod().toString();
if (!Method.POST.equals(session.getMethod())) {
return newFixedLengthResponse(Response.Status.METHOD_NOT_ALLOWED,
"application/json", "{\"error\":\"Method not allowed\"}");
}
ChatSession chatSession = getCurrentSession();
if (chatSession == null) {
return newFixedLengthResponse(Response.Status.SERVICE_UNAVAILABLE,
"application/json", "{\"error\":\"No active chat session\"}");
}
chatSession.reset(); // 确保清空之前的对话状态
byte[] buffer = new byte[session.getInputStream().available()];
session.getInputStream().read(buffer);
JSONObject request = new JSONObject(new String(buffer, StandardCharsets.UTF_8));
StringBuilder context = new StringBuilder();
JSONArray messages = request.getJSONArray("messages");
for (int i = 0; i < messages.length(); i++) {
JSONObject message = messages.getJSONObject(i);
context.append(message.getString("role"))
.append(": ")
.append(message.getString("content"))
.append("\n");
}
PipedInputStream in = new PipedInputStream();
PipedOutputStream out = new PipedOutputStream(in);
Response response = newChunkedResponse(Response.Status.OK, "text/event-stream", in);
response.addHeader("Content-Type", "text/event-stream");
response.addHeader("Cache-Control", "no-cache");
new Thread(() -> {
try {
String responseId = "catchall-" + System.currentTimeMillis();
chatSession.generate(context.toString(), progress -> {
try {
if (progress != null) {
JSONObject chunk = new JSONObject()
.put("id", responseId)
.put("object", "chat.completion.chunk")
.put("created", System.currentTimeMillis() / 1000)
.put("model", "mnn-local")
.put("choices", new JSONArray()
.put(new JSONObject()
.put("delta", new JSONObject()
.put("content", progress))
.put("index", 0)
.put("finish_reason", null)));
out.write(("data: " + chunk + "\n\n").getBytes(StandardCharsets.UTF_8));
out.flush();
}
} catch (Exception e) {
return true;
}
return false;
});
out.write("data: [DONE]\n\n".getBytes(StandardCharsets.UTF_8));
} catch (Exception e) {
// Ignore stream errors
} finally {
try {
out.close();
chatSession.reset(); // 请求处理完成后再次调用reset()确保状态清除
} catch (IOException e) {
// Ignore close errors
}
}
}).start();
return response;
}
private Response addCorsHeaders(Response response) {
response.addHeader("Access-Control-Allow-Origin", "*");
response.addHeader("Access-Control-Allow-Methods", "*");
response.addHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
response.addHeader("Cache-Control", "no-cache");
return response;
}
}
}
apps\Android\MnnLlmChat\app\src\main\AndroidManifest.xml 的 <application 中添加
<service
android:name="ddlx.api.OpenAIService"
android:enabled="true"
android:foregroundServiceType="dataSync"
>
</service>
依赖
apps\Android\MnnLlmChat\app\build.gradle的dependencies内填入
dependencies {
implementation 'org.nanohttpd:nanohttpd:2.3.1'
}
测试方式
1.点开某个模型开启对话之后,在同一个wifi的电脑上开启Windows PowerShell输入一下内容进行测试(先填入手机地址)
curl.exe -X POST `
http://此处填入手机WiFi地址:8080/v1/chat/completions `
-H "Content-Type: application/json" `
-d '{
"messages": [
{"role": "user", "content": "你好,你是谁?"}
]
}'
2.chatbox、Cherry Studio等客户端可以填入http://此处填入手机WiFi地址:8080/v1/chat/completions
模型填入mnn-local 秘钥随意填,
可能出现的问题
如果无法连接,可以尝试在AndroidManifest加入WiFi权限,或者清理mmap缓存、清退程序(彻底停止)之后程序进入