from fastapi import FastAPI,HTTPException from pydantic import BaseModel import requests,json from sse_starlette.sse import EventSourceResponse punct_list = ["。", "!", "?"] app = FastAPI() OLLAMA_URL = "http://localhost:11434/api/chat" class ChatRequestDirect(BaseModel): prompt: str max_tokens: int = 1024 temperature: float = 0.7 class ChatRequestStream(BaseModel): prompt: str @app.post("/v1/chat") def chat_completion(request: ChatRequestDirect): try: payload = { "model": "deepseek-r1:32b", "messages": [ { "role": "user", "content": request.prompt} ], "stream": False, "options": { "temperature": request.temperature, "num_predict": request.max_tokens } } response = requests.post(OLLAMA_URL, json=payload) return response.json()['message'] except: raise HTTPException(status_code=404, detail="Item not found") @app.post("/v1/stream") async def chat_stream(request: ChatRequestStream): try: def stream(): jsons = { "model": "deepseek-r1:32b", "messages": [{ "role": "user", "content": request.prompt}], "stream": True } response = requests.post(OLLAMA_URL, json=jsons, stream=True) text2tts = '' for line in response.iter_lines(): data = json.loads(line.decode('utf-8')) text = data['message']['content'] text2tts += text for punct in punct_list: if punct in text: front, back = text2tts.replace('\n', '').rsplit(punct, 1) yield front + punct text2tts = back break if text2tts: yield text2tts except: return return EventSourceResponse(stream()) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)
以上是关于ollama接口的简单本地调用。ollama国内网络也能pull
关于ollama部署deepseek-r1 主要还是看什么模型适合自己的电脑配置,cpu版本其实1.5b速度也蛮快好用。如果要用显卡的话就要配置cuda,版本不能错Ai学习之Ollama使用GPU运行模型的环境部署-优快云博客
2万+

被折叠的 条评论
为什么被折叠?



