open webui源码分析3—一次对话

原创已于 2025-08-18 21:51:43 修改 · 480 阅读

9 ·

CC 4.0 BY-SA版权

文章标签：

#大模型 #open webui

于 2025-08-18 10:34:44 首次发布

对话过程是open webui的核心逻辑，本文针对最简单的一个对话中的一次交互进行分析，暂不涉及知识库、推理、function calling、过滤、搜索、多模态等功能。

一、更新对话数据

创建对话后，前端马上发起一次请求更新对话数据，对应入口为:

http://{ip:port}/api/v1/chats/{chat_id}。请求数据如下：

{

chat:{
"id": "4ce79017-9cc5-499d-9b1f-c9e37048e763",
"title": "新对话",
"models": [
"deepseek-r1:1.5b"
],
"params": {},
"history": {
"messages": {
"b4e671e4-1ce1-43cc-a041-9a8e3c3904c4": {#创建对话时请求中的消息ID
"id": "b4e671e4-1ce1-43cc-a041-9a8e3c3904c4",
"parentId": null,
"childrenIds": [
"f1107cfa-9f69-4a38-88b5-8e284f9fa5ed"
],
"role": "user",
"content": "五代十国第一猛将是谁？",
"timestamp": 1755305425,
"models": [
"deepseek-r1:1.5b"
]
},

#前端生成,作为助手应答占位。该ID作为后继补足请求的消息ID
"2268a3da-9a78-4bd9-8cae-d399bed04f2a": {
"parentId": "1b4e671e4-1ce1-43cc-a041-9a8e3c3904c4",
"id": "2268a3da-9a78-4bd9-8cae-d399bed04f2a",
"childrenIds": [],
"role": "assistant",
"content": "",
"model": "deepseek-r1:1.5b",
"modelName": "deepseek-r1:1.5b",
"modelIdx": 0,
"timestamp": 1755305425
}
},
"currentId": "2268a3da-9a78-4bd9-8cae-d399bed04f2a"
},
"messages": [
{
"id": "191b6fe3-5dbd-4910-84e6-d4fe971e502d",
"parentId": null,
"childrenIds": [
"f1107cfa-9f69-4a38-88b5-8e284f9fa5ed"
],
"role": "user",
"content": "五代十国第一猛将是谁？",
"timestamp": 1755305425,
"models": [
"deepseek-r1:1.5b"
]
},
{
"parentId": "191b6fe3-5dbd-4910-84e6-d4fe971e502d",
"id": "f1107cfa-9f69-4a38-88b5-8e284f9fa5ed",
"childrenIds": [],
"role": "assistant",
"content": "",
"model": "deepseek-r1:1.5b",
"modelName": "deepseek-r1:1.5b",
"modelIdx": 0,
"timestamp": 1755305425
}
],
"tags": [],
"timestamp": 1755305425309,
"files": []
}

}

对于入口处理函数为update_chat_by_id，处理逻辑比较简单，用本地请求数据更新数据库中chat表的chat字段，并把完整的聊天记录返回前端。

@router.post("/{id}", response_model=Optional[ChatResponse])
async def update_chat_by_id(
id: str, form_data: ChatForm, user=Depends(get_verified_user)
):

#根据chat_id从数据库查找对应对话的chat字段
chat = Chats.get_chat_by_id_and_user_id(id, user.id)
if chat:

#对数据中存储的对话数据和请求中的对话数据做并集，同名属性后者覆盖前者
updated_chat = {**chat.chat, **form_data.chat}
chat = Chats.update_chat_by_id(id, updated_chat)#更新数据库中数据
return ChatResponse(**chat.model_dump())
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
)

二、发起competion

发起会话对应入口为：http://{ip:port}/api/chat/completions，对应的请求数据如下：

{
"stream": true,
"model": "deepseek-r1:1.5b",
"messages": [ #用户提问
{
"role": "user",
"content": "五代十国第一猛将是谁？"
}
],
"params": {},
"tool_servers": [],
"features": {#高级特性，暂不关注
"image_generation": false,
"code_interpreter": false,
"web_search": false,
"memory": false
},
"variables": { #前端传入的登录相关信息
"{{USER_NAME}}": "acaluis",
"{{USER_LOCATION}}": "Unknown",
"{{CURRENT_DATETIME}}": "2025-08-13 21:26:15",
"{{CURRENT_DATE}}": "2025-08-13",
"{{CURRENT_TIME}}": "21:26:15",
"{{CURRENT_WEEKDAY}}": "Wednesday",
"{{CURRENT_TIMEZONE}}": "Etc/GMT-8",
"{{USER_LANGUAGE}}": "zh-CN"
},
"model_item": {#大模型信息，使用了deepseek蒸馏模型
"id": "deepseek-r1:1.5b",
"name": "deepseek-r1:1.5b",
"object": "model",
"created": 1755089334,
"owned_by": "ollama",
"ollama": {
"name": "deepseek-r1:1.5b",
"model": "deepseek-r1:1.5b",
"modified_at": "2025-08-13T12:47:37.058323295Z",
"size": 1117322768,
"digest": "e0*d7",
"details": {
"parent_model": "",
"format": "gguf",
"family": "qwen2",
"families": [
"qwen2"
],
"parameter_size": "1.8B",
"quantization_level": "Q4_K_M"
},
"connection_type": "local",
"urls": [
0
],
"expires_at": 1755089575
},
"connection_type": "local",
"tags": [],
"actions": [],
"filters": []
},
"session_id": "vc0sbdfwW0jkvteMAAAF", #websocket对应的session_id
"chat_id": "4ce79017-9cc5-499d-9b1f-c9e37048e763",#创建会话时生成的会话ID
"id": "2268a3da-9a78-4bd9-8cae-d399bed04f2a",#更新对话请求数据中助手消息ID
"background_tasks": {#后台任务启用设置
"title_generation": true, #启用自动生成标题任务
"tags_generation": true, #启用自动生成标签任务
"follow_up_generation": true #启用自动生成跟随问题任务
}
}

对应入口函数为chat_completion，具体分析如下：

流程如下：

1）判断是否有可用模型，如果没有则从本地数据库查询

2）获取用户使用的模型信息

3）元数据metadata设置

4）调用process_chat_payload对请求进行预处理

5）调用chat_completion_handler方法向ollama平台发起competion请求

6）调用process_chat_response方法以SSE方式完成应答

@app.post("/api/chat/completions")
async def chat_completion(
request: Request,
form_data: dict,
user=Depends(get_verified_user),
)：

#后台任务已经从ollama请求获取了可用模型列表，所以不会进入内部的逻辑

if not request.app.state.MODELS:
await get_all_models(request, user=user)

model_item = form_data.pop("model_item", {}) #从请求中获取模型信息
tasks = form_data.pop("background_tasks", None) #从请求中获取需启用后台任务列表

metadata = {}
try:

#进入如下分支
if not model_item.get("direct", False):

model_id = form_data.get("model", None)
if model_id not in request.app.state.MODELS: #检查是否在预先拉取的模型列表中
raise Exception("Model not found")

model = request.app.state.MODELS[model_id]
model_info = Models.get_model_by_id(model_id)#获取模型信息

# 检查用户权限
if not BYPASS_MODEL_ACCESS_CONTROL and user.role == "user":
try:
check_model_access(user, model)
except Exception as e:
raise e

else: #如果open webui直接连接公有大模型，则进入如下分支，在此不予分析
model = model_item
model_info = None

request.state.direct = True
request.state.model = model

'''

对表单一级数据进行瘦身，把chat_id,id,session_id,filter_ids,tool_servers删除并追加到metadata中。同时在metadata中追加ser_id,files,features,variables,model

和function_calling。再把metadata设置到全局request.state中和表单数据中。

'''

metadata = {
"user_id": user.id,
"chat_id": form_data.pop("chat_id", None),
"message_id": form_data.pop("id", None),
"session_id": form_data.pop("session_id", None),
"filter_ids": form_data.pop("filter_ids", []),
"tool_ids": form_data.get("tool_ids", None),
"tool_servers": form_data.pop("tool_servers", None),
"files": form_data.get("files", None),
"features": form_data.get("features", {}),
"variables": form_data.get("variables", {}),
"model": model,
"direct": model_item.get("direct", False),
**(
{"function_calling": "native"}
if form_data.get("params", {}).get("function_calling") == "native"
or (
model_info
and model_info.params.model_dump().get("function_calling")
== "native"
)
else {}
),
}

request.state.metadata = metadata
form_data["metadata"] = metadata

form_data, metadata, events = await process_chat_payload(
request, form_data, user, metadata, model
)

……#异常处理代码不予关注

try:
response = await chat_completion_handler(request, form_data, user)

return await process_chat_response(
request, response, form_data, user, metadata, model, events, tasks
)

……#异常处理代码不予关注

下面对process_chat_payload函数进行分析：

流程如下：

1）确定执行任务的大模型，也就是当前会话选择的大模型

2）处理知识库，生成知识库文件列表

3）流水线处理

4）过滤器处理

5）高级特性，包括代码生成，文生图，内存搜索和web搜索

6）function calling处理

7）模型知识处理，发送请求到前端等待处理

async def process_chat_payload(request, form_data, user, metadata, model):

#针对请求中的params做处理，因为params为空，所以没有任何处理
form_data = apply_params_to_form_data(form_data, model)
log.debug(f"form_data: {form_data}")

#与前端websocket通信方法设置

event_emitter = get_event_emitter(metadata)
event_call = get_event_call(metadata)

extra_params = {
"__event_emitter__": event_emitter,
"__event_call__": event_call,
"__user__": user.model_dump() if isinstance(user, UserModel) else {},
"__metadata__": metadata,
"__request__": request,
"__model__": model,
}

'''

确定执行任务的大模型

''''
if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
models = {
request.state.model["id"]: request.state.model,
}
else:
models = request.app.state.MODELS

task_model_id = get_task_model_id(
form_data["model"], //zbl:模型名，比如deepseek-chat
request.app.state.config.TASK_MODEL,
request.app.state.config.TASK_MODEL_EXTERNAL,
models, //zbl:{deepseek-chat: model_item}
)

events = [] #保存发送到客户端的事件
sources = [] #保存上下文和应用数据

#从请求中获取用户的问话

user_message = get_last_user_message(form_data["messages"])

#根据模型信息确定是否挂载了知识库，并执行对应的处理
model_knowledge = model.get("info", {}).get("meta", {}).get("knowledge", False)

if model_knowledge: #处理外挂知识库的情况
await event_emitter(#通过websocket发送知识库查询请求到前端
{
"type": "status",
"data": {
"action": "knowledge_search",
"query": user_message,
"done": False,
},
}
)

knowledge_files = []
for item in model_knowledge:#把向量库集合增加到 knowledge_files中
if item.get("collection_name"):
knowledge_files.append(
{
"id": item.get("collection_name"),
"name": item.get("name"),
"legacy": True,
}
)
elif item.get("collection_names"):
knowledge_files.append(
{
"name": item.get("name"),
"type": "collection",
"collection_names": item.get("collection_names"),
"legacy": True,
}
)
else:
knowledge_files.append(item)

files = form_data.get("files", []) #获取请求中的所有文件
files.extend(knowledge_files) #把knowledge_files追加到files列表中
form_data["files"] = files #更新表单中的files

variables = form_data.pop("variables", None) #垃圾代码，后面未用到

# 以下代码对表单数据进行流水线处理，后继再对process_pipeline_inlet_filter进行分析
try:
form_data = await process_pipeline_inlet_filter(
request, form_data, user, models
)
except Exception as e:
raise e

#以下代码进行过滤器处理，后继再专门分析

try:

filter_functions = [#获取所有的过滤函数
Functions.get_function_by_id(filter_id)
for filter_id in get_sorted_filter_ids(
request, model, metadata.get("filter_ids", [])
)
]

form_data, flags = await process_filter_functions(#进行过滤处理
request=request,
filter_functions=filter_functions,
filter_type="inlet",
form_data=form_data,
extra_params=extra_params,
)
except Exception as e:
raise Exception(f"Error: {e}")

#以下代码处理高级特性，包括内存、web搜索、生成图片和生成代码。后继再分析

features = form_data.pop("features", None)
if features:
if "memory" in features and features["memory"]:
form_data = await chat_memory_handler(#从向量库查询匹配文档
request, form_data, extra_params, user
)

if "web_search" in features and features["web_search"]:
form_data = await chat_web_search_handler(#推送数据到前端进行搜索
request, form_data, extra_params, user
)

if "image_generation" in features and features["image_generation"]:
form_data = await chat_image_generation_handler(#生成图片
request, form_data, extra_params, user
)

if "code_interpreter" in features and features["code_interpreter"]:

#在表单数据中的messages中，增加CODE_INTERPRETER_PROMPT_TEMPLATE
form_data["messages"] = add_or_update_user_message(
(
request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE
if request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE != ""
else DEFAULT_CODE_INTERPRETER_PROMPT
),
form_data["messages"],
)

#从表单弹出客户端工具标识列表tool_ids

tool_ids = form_data.pop("tool_ids", None)

files = form_data.pop("files", None)

# Remove files duplicates
if files: #对表单中的files列表去重
files = list({json.dumps(f, sort_keys=True): f for f in files}.values())

metadata = {#以下代码中tool_ids和files设置重复，主函数中已经完成了设置
**metadata,
"tool_ids": tool_ids,
"files": files,
}
form_data["metadata"] = metadata

'''

以下代码处理工具，本文不展开。

'''

# 服务侧工具列表
tool_ids = metadata.get("tool_ids", None)
# 客户端工具列表
tool_servers = metadata.get("tool_servers", None)

log.debug(f"{tool_ids=}")
log.debug(f"{tool_servers=}")

tools_dict = {}

if tool_ids:
tools_dict = get_tools(#根据tool_ids获取tool字典
request,
tool_ids,
user,
{
**extra_params,
"__model__": models[task_model_id],
"__messages__": form_data["messages"],
"__files__": metadata.get("files", []),
},
)

if tool_servers:#把客户侧工具追加到工具字典中
for tool_server in tool_servers:
tool_specs = tool_server.pop("specs", [])

for tool in tool_specs:
tools_dict[tool["name"]] = {
"spec": tool,
"direct": True,
"server": tool_server,
}

if tools_dict:#处理函数调用
if metadata.get("function_calling") == "native":
# 如果function_calling是本地调用，则获取本地函数句柄
metadata["tools"] = tools_dict
form_data["tools"] = [
{"type": "function", "function": tool.get("spec", {})}
for tool in tools_dict.values()
]
else:
# 如果function_calling非本地调用，则走function calling流程
try:
form_data, flags = await chat_completion_tools_handler(
request, form_data, extra_params, user, models, tools_dict
)#获取到函数调用结果，一般是向量库匹配的文档
sources.extend(flags.get("sources", []))

except Exception as e:
log.exception(e)

try:

#本地调用function calling流程
form_data, flags = await chat_completion_files_handler(request, form_data, user)
sources.extend(flags.get("sources", [])) #把 context文件保存到sources列表中
except Exception as e:
log.exception(e)

#把查询到的上下文追加到messages中
if len(sources) > 0:
context_string = ""
citation_idx = {}
for source in sources:
if "document" in source:
for doc_context, doc_meta in zip(
source["document"], source["metadata"]
):
source_name = source.get("source", {}).get("name", None)
citation_id = (
doc_meta.get("source", None)
or source.get("source", {}).get("id", None)
or "N/A"
)
if citation_id not in citation_idx:
citation_idx[citation_id] = len(citation_idx) + 1
context_string += (
f'<source id="{citation_idx[citation_id]}"'
+ (f' name="{source_name}"' if source_name else "")
+ f">{doc_context}</source>\n"
)

context_string = context_string.strip()
prompt = get_last_user_message(form_data["messages"])

if prompt is None:
raise Exception("No user message found")
if (
request.app.state.config.RELEVANCE_THRESHOLD == 0
and context_string.strip() == ""
):
log.debug(
f"With a 0 relevancy threshold for RAG, the context cannot be empty"
)

# Workaround for Ollama 2.0+ system prompt issue
# TODO: replace with add_or_update_system_message
if model.get("owned_by") == "ollama":
form_data["messages"] = prepend_to_first_user_message_content(
rag_template(
request.app.state.config.RAG_TEMPLATE, context_string, prompt
),
form_data["messages"],
)
else:
form_data["messages"] = add_or_update_system_message(
rag_template(
request.app.state.config.RAG_TEMPLATE, context_string, prompt
),
form_data["messages"],
)

# If there are citations, add them to the data_items
sources = [
source
for source in sources
if source.get("source", {}).get("name", "")
or source.get("source", {}).get("id", "")
]

if len(sources) > 0:
events.append({"sources": sources})#把

if model_knowledge:#如果有知识库，则发送知识查询请求到前端
await event_emitter(
{
"type": "status",
"data": {
"action": "knowledge_search",
"query": user_message,
"done": True,
"hidden": True,
},
}
)

return form_data, metadata, events

下面对chat_completion_handler函数进行分析。该方法基于原始的请求和process_chat_payload的处理结果，直接通过接口调用ollama中的大模型，获取大模型的应答。chat_completion_handler实际为generate_chat_complete。

流程如下：

1）根据表单中的model参数确定使用的大模型类型

2）评估处理

3）管道相关处理

4）请求数据格式适配openai->ollama，调用ollama api，应答数据格式适配ollama->openai并返回流式应答

async def generate_chat_completion(
request: Request,
form_data: dict,
user: Any,
bypass_filter: bool = False,
):
log.debug(f"generate_chat_completion: {form_data}")
if BYPASS_MODEL_ACCESS_CONTROL: #判断是否绕过模型访问进行控制，缺省为否
bypass_filter = True

if hasattr(request.state, "metadata"):#把request.state.metadata追加到表单中
if "metadata" not in form_data:
form_data["metadata"] = request.state.metadata
else:
form_data["metadata"] = {
**form_data["metadata"],
**request.state.metadata,
}

#确定使用的大模型类型

if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
models = {
request.state.model["id"]: request.state.model,
}
log.debug(f"direct connection to model: {models}")
else: #对接ollama
models = request.app.state.MODELS

model_id = form_data["model"]
if model_id not in models:
raise Exception("Model not found")

model = models[model_id]

if getattr(request.state, "direct", False):#前端直连大模型时使用，不予关注
return await generate_direct_chat_completion(
request, form_data, user=user, models=models
)
else:#前端未直连大模型走本分支
# 用户权限检查
if not bypass_filter and user.role == "user":
try:
check_model_access(user, model)
except Exception as e:
raise e

if model.get("owned_by") == "arena":#处于arena模式时，进入本分支，后继再分析。
model_ids = model.get("info", {}).get("meta", {}).get("model_ids")
filter_mode = model.get("info", {}).get("meta", {}).get("filter_mode")
if model_ids and filter_mode == "exclude":
model_ids = [
model["id"]
for model in list(request.app.state.MODELS.values())
if model.get("owned_by") != "arena" and model["id"] not in model_ids
]

selected_model_id = None
if isinstance(model_ids, list) and model_ids:
selected_model_id = random.choice(model_ids)
else:
model_ids = [
model["id"]
for model in list(request.app.state.MODELS.values())
if model.get("owned_by") != "arena"
]
selected_model_id = random.choice(model_ids)

form_data["model"] = selected_model_id

if form_data.get("stream") == True:

async def stream_wrapper(stream):
yield f"data: {json.dumps({'selected_model_id': selected_model_id})}\n\n"
async for chunk in stream:
yield chunk

response = await generate_chat_completion(
request, form_data, user, bypass_filter=True
)
return StreamingResponse(
stream_wrapper(response.body_iterator),
media_type="text/event-stream",
background=response.background,
)
else:
return {
**(
await generate_chat_completion(
request, form_data, user, bypass_filter=True
)
),
"selected_model_id": selected_model_id,
}

#管道处理，目前还没有分析清楚，且跟主流程关系不大，后继再分析

if model.get("pipe"):
# Below does not require bypass_filter because this is the only route the uses this function and it is already bypassing the filter
return await generate_function_chat_completion(
request, form_data, user=user, models=models
)
if model.get("owned_by") == "ollama": #重点在这里
# 调用 /ollama/api/chat，所以要把openai格式的请求表单转换为ollama格式的表单
form_data = convert_payload_openai_to_ollama(form_data)
#发送请求到ollama，接受ollama返回的大模型生成的应答

response = await generate_ollama_chat_completion(
request=request,
form_data=form_data,
user=user,
bypass_filter=bypass_filter,
)
if form_data.get("stream"):#流式应答。一般走这个分支
response.headers["content-type"] = "text/event-stream"
return StreamingResponse(

#把ollama的流式应答转换为openai流式应答
convert_streaming_response_ollama_to_openai(response),
headers=dict(response.headers),
background=response.background,
)
else:
return convert_response_ollama_to_openai(response)
else:
return await generate_openai_chat_completion(
request=request,
form_data=form_data,
user=user,
bypass_filter=bypass_filter,
)

下面对入口函数中调用的最后一个函数process_chat_response进行分析。该函数可以说是openwebui中逻辑最复杂的函数，1400多行代码，多层函数嵌套，读起来很吃力，所以首先把该函数内部嵌套声明和调用的函数关系梳理处理，具体如下图所示：

基于上图，按照从上到下的顺序process_chat_response方法进行拆解。为便于分析，在分析过程中把嵌套函数删除，仅保留调用。首先分析最上层的process_chat_response。

流程如下;

1)创建websocket发送工具和调用工具

2）非流式应答检查，不会用到

3）非标准应答处理

4）更新chat表聊天字段

5）创建异步任务执行post_response_handle方法

6）给前端返回{"status": True, "task_id": task_id}

async def process_chat_response(
request, response, form_data, user, metadata, model, events, tasks
):

'''

创建通过websocket给前端发送数据的方法示例，其中event_emitter仅发送数据，无需

应答，event_caller则可以得到前端的应答

'''

event_emitter = None
event_caller = None
if ( #因为在入口已经把chat_id，session_id和id设置到了metadata中，所以走本分支
"session_id" in metadata
and metadata["session_id"]
and "chat_id" in metadata
and metadata["chat_id"]
and "message_id" in metadata
and metadata["message_id"]
):
event_emitter = get_event_emitter(metadata)
event_caller = get_event_call(metadata)

#下面是非流式应答，基本用不到，所以不做忽略

if not isinstance(response, StreamingResponse):

……

#对于非标的应答，直接返回
if not any(

content_type in response.headers["Content-Type"]
for content_type in ["text/event-stream", "application/x-ndjson"]
):
return response

#设置extra_params，后面process_filter_functions方法中使用

extra_params = {
"__event_emitter__": event_emitter,
"__event_call__": event_caller,
"__user__": user.model_dump() if isinstance(user, UserModel) else {},
"__metadata__": metadata,
"__request__": request,
"__model__": model,
}

#获取过滤器函数列表，暂不用关注
filter_functions = [
Functions.get_function_by_id(filter_id)
for filter_id in get_sorted_filter_ids(
request, model, metadata.get("filter_ids", [])
)
]

#对应答进行流式响应处理

if event_emitter and event_caller:
task_id = str(uuid4()) # 用UUID生成任务ID
model_id = form_data.get("model", "")

#{"model":"deepseek-r1"}插入到当前对话的history:messages对应的消息中

Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
"model": model_id,
},
)

#创建异步任务执行post_response_handle，完成流式应答处理

task_id, _ = await create_task(
request, post_response_handler(response, events), id=metadata["chat_id"]
)
return {"status": True, "task_id": task_id}

else: #前后端没有websocket通信时走本路径，不予关注

return StreamingResponse(
stream_wrapper(response.body_iterator, events),
headers=dict(response.headers),
background=response.background,
)

下面分析 post_response_handler方法。

async def post_response_handler(response, events):

#根据消息ID从数据库获取当前消息数据

message = Chats.get_message_by_id_and_message_id(
metadata["chat_id"], metadata["message_id"]
)

tool_calls = []

#从表单messages中获取助手的应答内容，并设置到content_blocks中

last_assistant_message = None
try: #如果请求表单中最后一个消息是助手消息，则获取助手回复内容
if form_data["messages"][-1]["role"] == "assistant":
last_assistant_message = get_last_assistant_message(
form_data["messages"]
)
except Exception as e:
pass

'''如果数据库中存在该消息，则不管content是否存在，都赋值给content，否则使用请

求表单中携带的助手回复内容'''

content = (
message.get("content", "")
if message
else last_assistant_message if last_assistant_message else ""
)

content_blocks = [#用来汇总流式应答中的答复内容，用上面的content初始化
{
"type": "text",
"content": content,
}
]

# 以下三个常量用于支持推理、解决方案和代码解释，暂不必关注
DETECT_REASONING = True
DETECT_SOLUTION = True
DETECT_CODE_INTERPRETER = metadata.get("features", {}).get(
"code_interpreter", False
)

reasoning_tags = [
("think", "/think"),
("thinking", "/thinking"),
("reason", "/reason"),
("reasoning", "/reasoning"),
("thought", "/thought"),
("Thought", "/Thought"),
("|begin_of_thought|", "|end_of_thought|"),
]

code_interpreter_tags = [("code_interpreter", "/code_interpreter")]

solution_tags = [("|begin_of_solution|", "|end_of_solution|")]

try:
for event in events:
await event_emitter(#把event数据推送到前端
{
"type": "chat:completion",
"data": event,
}
)

'''

把process_chat_payload中返回的events插入到chat表

chat:history:message_id中

'''
Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
**event,
},
)#endfor

#调用stream_body_handler方法，生成工具调用列表，各种消息处理及通知前端

await stream_body_handler(response)

#以下代码进行函数调用，然后调用generate_chat_completion生成应答

MAX_TOOL_CALL_RETRIES = 10
tool_call_retries = 0

while len(tool_calls) > 0 and tool_call_retries < MAX_TOOL_CALL_RETRIES:
tool_call_retries += 1

response_tool_calls = tool_calls.pop(0)

content_blocks.append(
{
"type": "tool_calls",
"content": response_tool_calls,
}
)

await event_emitter(
{
"type": "chat:completion",
"data": {
"content": serialize_content_blocks(content_blocks),
},
}
)

tools = metadata.get("tools", {})

results = []
for tool_call in response_tool_calls:
tool_call_id = tool_call.get("id", "")
tool_name = tool_call.get("function", {}).get("name", "")

tool_function_params = {}
try:
# json.loads cannot be used because some models do not produce valid JSON
tool_function_params = ast.literal_eval(
tool_call.get("function", {}).get("arguments", "{}")
)
except Exception as e:
log.debug(e)
# Fallback to JSON parsing
try:
tool_function_params = json.loads(
tool_call.get("function", {}).get("arguments", "{}")
)
except Exception as e:
log.debug(
f"Error parsing tool call arguments: {tool_call.get('function', {}).get('arguments', '{}')}"
)

tool_result = None

if tool_name in tools:
tool = tools[tool_name]
spec = tool.get("spec", {})

try:
allowed_params = (
spec.get("parameters", {})
.get("properties", {})
.keys()
)

tool_function_params = {
k: v
for k, v in tool_function_params.items()
if k in allowed_params
}

if tool.get("direct", False):
tool_result = await event_caller(
{
"type": "execute:tool",
"data": {
"id": str(uuid4()),
"name": tool_name,
"params": tool_function_params,
"server": tool.get("server", {}),
"session_id": metadata.get(
"session_id", None
),
},
}
)

else:
tool_function = tool["callable"]
tool_result = await tool_function(
**tool_function_params
)

except Exception as e:
tool_result = str(e)

tool_result_files = []
if isinstance(tool_result, list):
for item in tool_result:
# check if string
if isinstance(item, str) and item.startswith("data:"):
tool_result_files.append(item)
tool_result.remove(item)

if isinstance(tool_result, dict) or isinstance(
tool_result, list
):
tool_result = json.dumps(tool_result, indent=2)

results.append(
{
"tool_call_id": tool_call_id,
"content": tool_result,
**(
{"files": tool_result_files}
if tool_result_files
else {}
),
}
)

content_blocks[-1]["results"] = results

content_blocks.append(
{
"type": "text",
"content": "",
}
)

await event_emitter(
{
"type": "chat:completion",
"data": {
"content": serialize_content_blocks(content_blocks),
},
}
)

try:
res = await generate_chat_completion(
request,
{
"model": model_id,
"stream": True,
"tools": form_data["tools"],
"messages": [
*form_data["messages"],
*convert_content_blocks_to_messages(content_blocks),
],
},
user,
)

if isinstance(res, StreamingResponse):
await stream_body_handler(res)
else:
break
except Exception as e:
log.debug(e)
break#end while

if DETECT_CODE_INTERPRETER:
MAX_RETRIES = 5
retries = 0

while (
content_blocks[-1]["type"] == "code_interpreter"
and retries < MAX_RETRIES
):
await event_emitter(
{
"type": "chat:completion",
"data": {
"content": serialize_content_blocks(content_blocks),
},
}
)

retries += 1
log.debug(f"Attempt count: {retries}")

output = ""
try:
if content_blocks[-1]["attributes"].get("type") == "code":
code = content_blocks[-1]["content"]

if (
request.app.state.config.CODE_INTERPRETER_ENGINE
== "pyodide"
):
output = await event_caller(
{
"type": "execute:python",
"data": {
"id": str(uuid4()),
"code": code,
"session_id": metadata.get(
"session_id", None
),
},
}
)
elif (
request.app.state.config.CODE_INTERPRETER_ENGINE
== "jupyter"
):
output = await execute_code_jupyter(
request.app.state.config.CODE_INTERPRETER_JUPYTER_URL,
code,
(
request.app.state.config.CODE_INTERPRETER_JUPYTER_AUTH_TOKEN
if request.app.state.config.CODE_INTERPRETER_JUPYTER_AUTH
== "token"
else None
),
(
request.app.state.config.CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD
if request.app.state.config.CODE_INTERPRETER_JUPYTER_AUTH
== "password"
else None
),
request.app.state.config.CODE_INTERPRETER_JUPYTER_TIMEOUT,
)
else:
output = {
"stdout": "Code interpreter engine not configured."
}

log.debug(f"Code interpreter output: {output}")

if isinstance(output, dict):
stdout = output.get("stdout", "")

if isinstance(stdout, str):
stdoutLines = stdout.split("\n")
for idx, line in enumerate(stdoutLines):
if "data:image/png;base64" in line:
image_url = ""
# Extract base64 image data from the line
image_data, content_type = (
load_b64_image_data(line)
)
if image_data is not None:
image_url = upload_image(
request,
image_data,
content_type,
metadata,
user,
)
stdoutLines[idx] = (
f"![Output Image]({image_url})"
)

output["stdout"] = "\n".join(stdoutLines)

result = output.get("result", "")

if isinstance(result, str):
resultLines = result.split("\n")
for idx, line in enumerate(resultLines):
if "data:image/png;base64" in line:
image_url = ""
# Extract base64 image data from the line
image_data, content_type = (
load_b64_image_data(line)
)
if image_data is not None:
image_url = upload_image(
request,
image_data,
content_type,
metadata,
user,
)
resultLines[idx] = (
f"![Output Image]({image_url})"
)
output["result"] = "\n".join(resultLines)
except Exception as e:
output = str(e)

content_blocks[-1]["output"] = output

content_blocks.append(
{
"type": "text",
"content": "",
}
)

await event_emitter(
{
"type": "chat:completion",
"data": {
"content": serialize_content_blocks(content_blocks),
},
}
)

try:
res = await generate_chat_completion(
request,
{
"model": model_id,
"stream": True,
"messages": [
*form_data["messages"],
{
"role": "assistant",
"content": serialize_content_blocks(
content_blocks, raw=True
),
},
],
},
user,
)

if isinstance(res, StreamingResponse):
await stream_body_handler(res)
else:
break
except Exception as e:
log.debug(e)
break#endwhile

#endif DETECT_CODE_INTERPRETER

#从数据库中根据chat_id获取当前会话的标题

title = Chats.get_chat_title_by_id(metadata["chat_id"])
data = {#组织完成应答后推送到前端的数据，包括状态、标题和完整的应答
"done": True,
"content": serialize_content_blocks(content_blocks),
"title": title,
}

#把大模型的应答更新到chat表chat字段的Assistant应答数据中

if not ENABLE_REALTIME_CHAT_SAVE:
# Save message in the database
Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
"content": serialize_content_blocks(content_blocks),
},
)

#如果用户为非活跃状态，则通过webhook通知用户
if not get_active_status_by_user_id(user.id):
webhook_url = Users.get_user_webhook_url_by_id(user.id)
if webhook_url:
post_webhook(
request.app.state.WEBUI_NAME,
webhook_url,
f"{title} - {request.app.state.config.WEBUI_URL}/c/{metadata['chat_id']}\n\n{content}",
{
"action": "chat",
"message": content,
"title": title,
"url": f"{request.app.state.config.WEBUI_URL}/c/{metadata['chat_id']}",
},
)

'''

以下代码通过websocket把应答结束数据data推送到前端，通知前端补足结束，比如

{
"done": true,
"content": "王夫之是中国古代文人墨客中的一位重要人物。他不仅是一位伟大

的文学家，更是一位具有深远历史意义的政治家。\n\n王夫之在诗歌

创作上非常出色，他的诗歌语言流畅，情感深沉，被誉为“诗仙”。此

外，王夫之还是中国现代史上一些重要的事件的参与者。\n\n总的来

说，王夫之是中国古代文人墨客中的一位重要人物。他不仅是一位

伟大的文学家，更是一位具有深远历史意义的政治家。",
"title": "your concise title here"
}

'''

await event_emitter(
{
"type": "chat:completion",
"data": data,
}
)

await background_tasks_handler()

except asyncio.CancelledError:
log.warning("Task was cancelled!")
await event_emitter({"type": "task-cancelled"})

if not ENABLE_REALTIME_CHAT_SAVE:
# Save message in the database
Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
"content": serialize_content_blocks(content_blocks),
},
)

if response.background is not None:
await response.background()

下面对stream_body_handler函数进行分析，该函数遍历从ollama返回符合openai格式的流，针对流中的每个chunk，从中提取应答内容，通过websocket发送到前端。为便于理解，把一个块内容贴到这里。

{

data:{
"choices": [
{
"delta": {
"content": "熹"
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"object": "chat.completion.chunk",
"usage": null,
"created": 1755398817,
"system_fingerprint": null,
"model": "qwen-max-0428",
"id": "chatcmpl-988e2281-f52d-9956-806d-e454c6cdcd68"
}

}

}

以下是stream_body_handler函数代码。

处理流程如下：

1）content和content_blocks声明为nonlocal，分别用于存放应答内容和内容块

2）遍历流中的块，进行如下处理

2.1）针对数据进行解码、防错和去前缀data:

2.2）过滤器处理

2.3）报错处理和usage处理，推送错误和usage数据到前端

2.4）tool_call处理

2.5）推理处理

2.6）从提取增量应答内容拼接到content中

2.7）把content追加到content_blocks最后一个块中并进行序列化

2.8）把最新的应答全量推送到前端

3）对content_blocks进行清理

4）其他处理

async def stream_body_handler(response):
nonlocal content #保存当前块之前所有块中content中的内容
nonlocal content_blocks

response_tool_calls = []

#以下循环迭代处理流中的块

async for line in response.body_iterator:

''''以下几行代码做解码、防错和去前缀处理'''

#如果做了UTF8编码，则先解码
line = line.decode("utf-8") if isinstance(line, bytes) else line
data = line

# 如果内容为空则不处理
if not data.strip():
continue

# 如果非'data:'则不处理
if not data.startswith("data:"):
continue

#把'data:'删除
data = data[len("data:") :].strip()

try:
data = json.loads(data)

data, _ = await process_filter_functions(#过滤器处理，暂不关注
request=request,
filter_functions=filter_functions,
filter_type="stream",
form_data=data,
extra_params=extra_params,
)

if data:
if "event" in data:#如果数据块中有事件，则通过websocket发送到前端
await event_emitter(data.get("event", {}))

if "selected_model_id" in data: #暂不分析
model_id = data["selected_model_id"]
Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
"selectedModelId": model_id,
},
)
else:
choices = data.get("choices", [])
if not choices:#数据中没有choices，往往是报错或者是单纯的usage数据
error = data.get("error", {})
if error: #如果是报错块，则把错误推送到前端
await event_emitter(
{
"type": "chat:completion",
"data": {
"error": error,
},
}
)
usage = data.get("usage", {})
if usage:#把使用情况推送到前端
await event_emitter(
{
"type": "chat:completion",
"data": {
"usage": usage,
},
}
)
continue

delta = choices[0].get("delta", {}) #从报文中提取delta内容
delta_tool_calls = delta.get("tool_calls", None) #从delta中提取tool_calls内容

if delta_tool_calls: #工具调用效果相关处理，暂不分析
for delta_tool_call in delta_tool_calls:
tool_call_index = delta_tool_call.get(
"index"
)

if tool_call_index is not None:
# Check if the tool call already exists
current_response_tool_call = None
for (
response_tool_call
) in response_tool_calls:
if (
response_tool_call.get("index")
== tool_call_index
):
current_response_tool_call = (
response_tool_call
)
break

if current_response_tool_call is None:
# Add the new tool call
delta_tool_call.setdefault(
"function", {}
)
delta_tool_call[
"function"
].setdefault("name", "")
delta_tool_call[
"function"
].setdefault("arguments", "")
response_tool_calls.append(
delta_tool_call
)
else:
# Update the existing tool call
delta_name = delta_tool_call.get(
"function", {}
).get("name")
delta_arguments = (
delta_tool_call.get(
"function", {}
).get("arguments")
)

if delta_name:
current_response_tool_call[
"function"
]["name"] += delta_name

if delta_arguments:
current_response_tool_call[
"function"
][
"arguments"
] += delta_arguments

value = delta.get("content")

reasoning_content = (
delta.get("reasoning_content")
or delta.get("reasoning")
or delta.get("thinking")
)
if reasoning_content:#推理处理，暂不分析
if (
not content_blocks
or content_blocks[-1]["type"] != "reasoning"
):
reasoning_block = {
"type": "reasoning",
"start_tag": "think",
"end_tag": "/think",
"attributes": {
"type": "reasoning_content"
},
"content": "",
"started_at": time.time(),
}
content_blocks.append(reasoning_block)
else:
reasoning_block = content_blocks[-1]

reasoning_block["content"] += reasoning_content

data = {
"content": serialize_content_blocks(
content_blocks
)
}#endif

if value:
if (
content_blocks
and content_blocks[-1]["type"]
== "reasoning"
and content_blocks[-1]
.get("attributes", {})
.get("type")
== "reasoning_content"
):#推理处理，暂不分析
reasoning_block = content_blocks[-1]
reasoning_block["ended_at"] = time.time()
reasoning_block["duration"] = int(
reasoning_block["ended_at"]
- reasoning_block["started_at"]
)

content_blocks.append(
{
"type": "text",
"content": "",
}
)

''''以下是无推理情况下收集分块中内容的关键代码'''

content = f"{content}{value}" #把当前块content中的内容拼接到content中
if not content_blocks:
content_blocks.append(
{
"type": "text",
"content": "",
}
)

#把当前块content中的内容拼接到content_blocks最后一个块的cotent中

content_blocks[-1]["content"] = (
content_blocks[-1]["content"] + value
)

if DETECT_REASONING: #推理处理
content, content_blocks, _ = (
tag_content_handler(
"reasoning",
reasoning_tags,
content,
content_blocks,
)
)

if DETECT_CODE_INTERPRETER:#代码解释
content, content_blocks, end = (
tag_content_handler(
"code_interpreter",
code_interpreter_tags,
content,
content_blocks,
)
)

if end:
break

if DETECT_SOLUTION: #解答
content, content_blocks, _ = (
tag_content_handler(
"solution",
solution_tags,
content,
content_blocks,
)
)

#是否把当前块content中的内容拼接到content_blocks

if ENABLE_REALTIME_CHAT_SAVE: #缺省不实时落地
# Save message in the database
Chats.upsert_message_to_chat_by_id_and_message_id(
metadata["chat_id"],
metadata["message_id"],
{
"content": serialize_content_blocks(
content_blocks
),
},
)
else:#把数据序列化
data = {
"content": serialize_content_blocks(
content_blocks
),
}

'''

把前一步序列化后的内容推送到前端，比如以下是连续三个推送到前端数据：

{"type": "chat:completion","data": { "content": "王夫之是中国古代文人"}}

{"type": "chat:completion","data": { "content": "王夫之是中国古代文人墨"}}

{"type": "chat:completion","data": { "content": "王夫之是中国古代文人墨客"}}

推送到前端的内容是截止当前块所有的内容，并非增量内容

'''

await event_emitter(
{
"type": "chat:completion",
"data": data,
}
)
except Exception as e:
done = "data: [DONE]" in line
if done:
pass
else:
log.debug("Error: ", e)
continue

if content_blocks:#迭代完成流处理后的清理工作
# 如果最后一个块类型为'text'，则删除开头和结尾的空格
if content_blocks[-1]["type"] == "text":
content_blocks[-1]["content"] = content_blocks[-1][
"content"
].strip()

#如果当前块内容为空，则从content_blocks中删除

if not content_blocks[-1]["content"]:
content_blocks.pop()

#如果content_blocks是空数组，则增加一个内容为空的，类类型为text的块

if not content_blocks:
content_blocks.append(
{
"type": "text",
"content": "",
}
)

if response_tool_calls:#工具调用处理，暂不分析
tool_calls.append(response_tool_calls)

if response.background:#后台任务处理，暂不分析
await response.background()

三、结束处理

前端接受完一次请求的所有数据后，发起补足完成。入口地址为： http://{ip:port}/api/chat/completed，对应入口函数为chat_completed。请求数据如下：

{
"model": "qwen:0.5b",
"messages": [
{
"id": "b4e671e4-1ce1-43cc-a041-9a8e3c3904c4",
"role": "user",
"content": "你好",
"timestamp": 1755409340
},
{
"id": "2268a3da-9a78-4bd9-8cae-d399bed04f2a",
"role": "assistant",
"content": "你好！很高兴能为你提供帮助。如果你有任何问题或需要其他帮助，请随时告诉我，我会尽我所能来帮助你。",
"timestamp": 1755409340,
"usage": {
"response_token/s": 23.56,
"prompt_token/s": 74.05,
"total_duration": 1365798040,
"load_duration": 53633983,
"prompt_eval_count": 9,
"prompt_tokens": 9,
"prompt_eval_duration": 121541610,
"eval_count": 28,
"completion_tokens": 28,
"eval_duration": 1188287448,
"approximate_total": "0h0m1s",
"total_tokens": 37,
"completion_tokens_details": {
"reasoning_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
}
}
],
"model_item": {
"id": "qwen:0.5b",
"name": "qwen:0.5b",
"object": "model",
"created": 1755409331,
"owned_by": "ollama",
"ollama": {
"name": "qwen:0.5b",
"model": "qwen:0.5b",
"modified_at": "2025-08-17T05:40:18.859598053Z",
"size": 394998579,
"digest": "b5dc5e784f2a3ee1582373093acf69a2f4e2ac1710b253a001712b86a61f88bb",
"details": {
"parent_model": "",
"format": "gguf",
"family": "qwen2",
"families": ["qwen2"],
"parameter_size": "620M",
"quantization_level": "Q4_0"
},
"connection_type": "local",
"urls": [0],
"expires_at": 1755409520
},
"connection_type": "local",
"tags": [],
"actions": [],
"filters": []
},
"chat_id": "4ce79017-9cc5-499d-9b1f-c9e37048e763",
"session_id": "pT8ZygWewqvn9ugvAAAd",
"id": "2268a3da-9a78-4bd9-8cae-d399bed04f2a"
}

本方法非常精简，在使用ollama时直接调用chat_completed_handler方法，代码如下：

@app.post("/api/chat/completed")
async def chat_completed(
request: Request, form_data: dict, user=Depends(get_verified_user)
):
try:

#从表单中弹出model_item，所以在应答中没有该项
model_item = form_data.pop("model_item", {})

if model_item.get("direct", False):
request.state.direct = True
request.state.model = model_item

return await chat_completed_handler(request, form_data, user)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)

chat_completed_handler方法对应的是chat.py文件中的chat_completed方法，所以下面分析以下chat_completed。chat_completed中完成流水线处理和过滤器处理，当前分析中均没有处理，所以把请求原封不动的返回到前端，因为在 chat_completed已经把表单中的model_item删除，所以应答中不包含model_item内容。

async def chat_completed(request: Request, form_data: dict, user: Any):
if not request.app.state.MODELS:
await get_all_models(request, user=user)

#直连处理，不必关注

if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
models = {
request.state.model["id"]: request.state.model,
}
else:
models = request.app.state.MODELS

data = form_data
model_id = data["model"]
if model_id not in models:
raise Exception("Model not found")

model = models[model_id] #获取使用的模型

try: #流水线处理，暂不关注
data = await process_pipeline_outlet_filter(request, data, user, models)
except Exception as e:
return Exception(f"Error: {e}")

metadata = {
"chat_id": data["chat_id"],
"message_id": data["id"],
"filter_ids": data.get("filter_ids", []),
"session_id": data["session_id"],
"user_id": user.id,
}

extra_params = {
"__event_emitter__": get_event_emitter(metadata),
"__event_call__": get_event_call(metadata),
"__user__": user.model_dump() if isinstance(user, UserModel) else {},
"__metadata__": metadata,
"__request__": request,
"__model__": model,
}

try:
filter_functions = [
Functions.get_function_by_id(filter_id)
for filter_id in get_sorted_filter_ids(
request, model, metadata.get("filter_ids", [])
)
]

result, _ = await process_filter_functions(#过滤器处理，暂不关注
request=request,
filter_functions=filter_functions,
filter_type="outlet",
form_data=data,
extra_params=extra_params,
)
return result#数据原封不动返回
except Exception as e:
return Exception(f"Error: {e}")

应答数据为：

{
"model": "qwen:0.5b",
"messages": [
{
"id": "191489de-55b9-4d65-8ac6-7ae3f21b6802",
"role": "user",
"content": "你好",
"timestamp": 1755409340
},
{
"id": "d4ce313b-07ba-4941-81a1-0b114df1babe",
"role": "assistant",
"content": "你好！很高兴能为你提供帮助。如果你有任何问题或需要其他帮助，请随时告诉我，我会尽我所能来帮助你。",
"timestamp": 1755409340,
"usage": {
"response_token/s": 23.56,
"prompt_token/s": 74.05,
"total_duration": 1365798040,
"load_duration": 53633983,
"prompt_eval_count": 9,
"prompt_tokens": 9,
"prompt_eval_duration": 121541610,
"eval_count": 28,
"completion_tokens": 28,
"eval_duration": 1188287448,
"approximate_total": "0h0m1s",
"total_tokens": 37,
"completion_tokens_details": {
"reasoning_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
}
}
],
"chat_id": "b0e898f4-4299-49bd-87b7-27bd67dfc317",
"session_id": "pT8ZygWewqvn9ugvAAAd",
"id": "d4ce313b-07ba-4941-81a1-0b114df1babe"
}