目录
前言
本篇章将会完善对之前聊天请求的操作,升级为能对文件进行问答操作的对话接口,此操作不是对文件进行增删改查,而是基于文件的内容来进行回答。
此操作可以快速统计一些零散的数据,或者在长篇的文件中快速查找关键词,主题和内容。
若没有之前的基础对话接口,请参考该链接进行学习参考:
AI智能体(Agent)大模型入门【6】--编写fasteAPI后端请求接口实现页面聊天-优快云博客
代码优化升级
原本的代码
@app.post("/chat_post")
async def chat_post(session_id: int | None = Depends(parse_session_id),
message: str = Form(...)):
"""
最基本的聊天模式请求,只调用了大模型,进行简单的会话处理
不支持文件上传回答
:param request:
:return:
"""
with closing(get_conn()) as conn:
with conn.cursor() as cur:
# --- 1. session ---
if session_id is None:
title = message[:15] + "..." if len(message) > 15 else message
cur.execute("INSERT INTO ChatList (title) VALUES (%s)", (title,))
session_id = cur.lastrowid
# 对于新会话,也需要调用模型生成回复
msg = ChatMessage(role="user", content=message)
response = llm.chat([msg])
reply = response.message.content
# 保存用户消息
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'user',%s)",
(session_id, 1, message)
)
# 保存助手回复
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'assistant',%s)",
(session_id, 2, reply)
)
else:
session_id = session_id
# --- 2. seq ---
cur.execute("SELECT MAX(msg_seq) mx FROM ChatHistory WHERE chat_list_id=%s", (session_id,))
seq = (cur.fetchone()["mx"] or 0) + 1
# --- 3. 用户消息 ---
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'user',%s)",
(session_id, seq, message)
)
# --- 4. 调模型(同步方法) ---
msg = ChatMessage(role="user", content=message)
response = llm.chat([msg])
reply = response.message.content
# --- 5. 助手消息 ---
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'assistant',%s)",
(session_id, seq + 1, reply)
)
cur.execute(
"UPDATE ChatList SET updated_at = NOW() WHERE id = %s",
(session_id,)
)
conn.commit()
return ChatResponse(session_id=session_id, reply=reply)
升级过程
将代码添加文件可增选选项参数
async def chat_post(session_id: int | None = Depends(parse_session_id),
message: str = Form(...),
file: UploadFile | None = File(None)):
优化成这样子,增加文件参数
然后添加判断文件函数
file_text = ""
if file and file.size:
try:
file_text =await any_uploadfile_to_text(file)
except Exception as e:
print(e)
关于这个any_uploadfile_to_texth函数,我就快速给大家了,记得要写在utils.py工具模块里面,这个函数本质上就是对文件的操作
提取后缀名,利用ocr读取文件的内容,然后输出
async def any_uploadfile_to_text(file: UploadFile) -> str:
ext = Path(file.filename).suffix.lower()
data = await file.read() # 一次异步读完
mem = io.BytesIO(data)
# 1. 纯文本
if ext in {".txt", ".text", ".md", ".csv"}:
if ext == ".csv":
# 保留表格形状
mem.seek(0)
lines = [",".join(row) for row in csv.reader(io.TextIOWrapper(mem, encoding="utf-8"))]
return "\n".join(lines)
return mem.read().decode("utf-8", "ignore")
# 2. RTF(用 fitz 能直接读)
if ext == ".rtf":
mem.seek(0)
doc = fitz.open(stream=mem, filetype="rtf")
return "\n".join(page.get_text() for page in doc)
# 3. 老版 Word .doc(需 antiword 或 soffice,这里用简易方案)
if ext == ".doc":
mem.seek(0)
with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
tmp.write(mem.read())
tmp_path = tmp.name
try:
# 系统装 antiword 后可直接调用
import subprocess
return subprocess.check_output(["antiword", tmp_path], text=True, stderr=subprocess.DEVNULL)
except FileNotFoundError:
raise HTTPException(501, "服务器未安装 antiword,无法解析 .doc 文件")
finally:
os.remove(tmp_path)
# 4. PDF / Office 2007+ 保持你原来逻辑
if ext == ".pdf":
mem.seek(0)
doc = fitz.open(stream=mem, filetype="pdf")
return "\n".join(page.get_text() for page in doc)
if ext == ".docx":
mem.seek(0)
return "\n".join(p.text for p in Document(mem).paragraphs)
if ext == ".xlsx":
mem.seek(0)
wb = load_workbook(mem, data_only=True)
rows = []
for ws in wb.worksheets:
for row in ws.iter_rows(values_only=True):
rows.append(" ".join(str(v) for v in row if v is not None))
return "\n".join(rows)
if ext == ".pptx":
mem.seek(0)
texts = []
for slide in Presentation(mem).slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
texts.append(shape.text)
return "\n".join(texts)
# 5. 图片/扫描 PDF → OCR
if ext in {".png", ".jpg", ".jpeg", ".tiff", ".bmp"}:
mem.seek(0)
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
tmp.write(mem.read())
tmp_path = tmp.name
try:
return ocr_file_to_text_llm_kimi(tmp_path)
finally:
os.remove(tmp_path)
raise HTTPException(415, f"不支持的文件类型: {ext}")
代码的话基本上应该都能理解,这里就不做过多的阐述了,希望自己来消化。
然后对文件和用户的消息进行拼接
if file_text:
user_message = f"用户上传文件的内容为:{file_text}\n\n用户的问题为:{message}"
else:
user_message = message
后面代码逻辑保持不变
接下来给完整的请求接口代码
@app.post("/chat_post")
async def chat_post(session_id: int | None = Depends(parse_session_id),
message: str = Form(...),
file: UploadFile | None = File(None)):
"""
结合ocr进行文件的回答,包含图片png,image,jpeg等
只是结合文件内容进行回答
:param session_id:会话id
:param message:用户传递的输入消息
:param file:用户上传的文件
:return:
"""
file_text = ""
if file and file.size:
try:
file_text =await any_uploadfile_to_text(file)
except Exception as e:
print(e)
if file_text:
user_message = f"用户上传文件的内容为:{file_text}\n\n用户的问题为:{message}"
else:
user_message = message
with closing(get_conn()) as conn:
with conn.cursor() as cur:
# --- 1. session ---
if session_id is None:
title = message[:15] + "..." if len(message) > 15 else message
cur.execute("INSERT INTO ChatList (title) VALUES (%s)", (title,))
session_id = cur.lastrowid
else:
session_id = session_id
# --- 2. seq ---
cur.execute("SELECT MAX(msg_seq) mx FROM ChatHistory WHERE chat_list_id=%s", (session_id,))
seq = (cur.fetchone()["mx"] or 0) + 1
# --- 3. 用户消息 ---
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'user',%s)",
(session_id, seq, message)
)
# --- 4. 调模型(同步方法) ---
msg = ChatMessage(role="user", content=user_message)
response =await llm.achat([msg])
reply = response.message.content
# --- 5. 助手消息 ---
cur.execute(
"INSERT INTO ChatHistory (chat_list_id, msg_seq, role, message) "
"VALUES (%s,%s,'assistant',%s)",
(session_id, seq + 1, reply)
)
cur.execute(
"UPDATE ChatList SET updated_at = NOW() WHERE id = %s",
(session_id,)
)
conn.commit()
return ChatResponse(session_id=session_id, reply=reply)
运行演示

例如图片一样传递参数进行加载
后端接口输出

后言
这里面的代码不一定是完全正确的或者完整的,需要结合专栏文章和后续编写的文章进行补缺查漏升级,也可以借用ai辅助完善代码,我只提供参考
1027

被折叠的 条评论
为什么被折叠?



