为我写方式三的全代码
"""
【通义千问 Qwen】API集成模块
用于意图理解和任务处理
"""
import json
import re
import logging
import dashscope
from dashscope import Generation
from database import config
from Progress.utils.logger_utils import log_time, log_step, log_var, log_call
from Progress.utils.logger_config import setup_logger
# --- 初始化日志器 ---
logger = logging.getLogger("ai_assistant")
DASHSCOPE_API_KEY = config.api_key
DASHSCOPE_MODEL = config.model
class QWENAssistant:
def __init__(self):
if not DASHSCOPE_API_KEY:
raise ValueError("缺少 DASHSCOPE_API_KEY,请检查配置文件")
dashscope.api_key = DASHSCOPE_API_KEY
self.model_name = DASHSCOPE_MODEL or 'qwen-max'
logger.info(f"✅ QWENAssistant 初始化完成,使用模型: {self.model_name}")
self.conversation_history = []
self.system_prompt = """
你是一个智能语音控制助手,能够理解用户的自然语言指令,并将其转化为可执行的任务计划。
你的职责是:
- 准确理解用户意图;
- 若涉及多个动作,需拆解为【执行计划】;
- 输出一个严格符合规范的 JSON 对象,供系统解析执行;
- 所有回复必须使用中文(仅限于 response_to_user 字段);
🎯 输出格式要求(必须遵守):
{
"intent": "system_control", // 意图类型:"system_control"
"task_type": "start_background_tasks",// 任务类型的简要描述(动态生成)
"execution_plan": [ // 执行步骤列表(每个步骤包含 operation, parameters, description)
{
"operation": "函数名", // 必须是已知操作之一
"parameters": { ... }, // 参数对象(按需提供)
"description": "该步骤的目的说明"
}
],
"response_to_user": "你要对用户说的话(用中文)",
"requires_confirmation": false, // 是否需要用户确认后再执行
"mode": "parallel" // 执行模式:"parallel"(并行)或 "serial"(串行)
}
📌 已知 operation 列表(不可拼写错误):
- play_music(music_path: str)
- stop_music()
- pause_music()
- resume_music()
- open_application(app_name: str)
- create_file(file_name: str, content?: str)
- read_file(file_name: str)
- write_file(file_name: str, content: str)
- set_reminder(reminder_time: str, message: str)
- exit()
📌 规则说明:
1. 只有当用户明确要求执行系统级任务时,才设置 intent="system_control";
否则设为 intent="chat"(例如闲聊、问天气、讲笑话等)。
2. execution_plan 中的每一步都必须与用户需求直接相关;
❌ 禁止添加无关操作(如随便加 speak_response 或 play_music)!
3. mode 决定执行方式:
- 如果各步骤互不依赖 → "parallel"
- 如果有先后依赖(如先打开再写入)→ "serial"
4. response_to_user 是你对用户的自然语言反馈,必须简洁友好,使用中文。
5. requires_confirmation:
- 涉及删除、覆盖文件、长时间运行任务 → true
- 普通操作(打开应用、播放音乐)→ false
⚠️ 重要警告:
- 绝不允许照搬示例中的参数或路径!必须根据用户输入提取真实信息。
- 不得虚构不存在的 operation 或 parameter 名称。
- 不得省略任何字段,所有 key 都必须存在。
- 不得输出额外文本(如解释、注释、
```json
``` 包裹符),只输出纯 JSON 对象。
✅ 正确行为示例:
用户说:“帮我写一份自我介绍到 D:/intro.txt,并打开看看”
→ 应返回包含 write_file 和 read_file 的 serial 计划。
用户说:“播放 C:/Music/background.mp3 并告诉我准备好了”
→ 可以并行执行 play_music 和 speak_response。
用户说:“今天过得怎么样?”
→ intent="chat",response_to_user="我很好,谢谢!"
🚫 错误行为:
- 把所有指令都变成和示例一样的操作组合;
- 在没有请求的情况下自动添加 speak_response;
- 使用未定义的操作如 run_script、send_email。
现在,请根据用户的最新指令生成对应的 JSON 响应。
"""
@log_time
@log_step("处理语音指令")
def process_voice_command(self, voice_text):
log_var("原始输入", voice_text)
if not voice_text.strip():
return self._create_fallback_response("我没有听清楚,请重新说话。")
self.conversation_history.append({"role": "user", "content": voice_text})
try:
messages = [{"role": "system", "content": self.system_prompt}]
messages.extend(self.conversation_history[-10:]) # 保留最近上下文
response = Generation.call(
model=self.model_name,
messages=messages,
temperature=0.5,
top_p=0.8,
max_tokens=1024
)
if response.status_code != 200:
logger.error(f"Qwen API 调用失败: {response.status_code}, {response.message}")
return self._create_fallback_response(f"服务暂时不可用: {response.message}")
ai_output = response.output['text'].strip()
log_var("模型输出", ai_output)
self.conversation_history.append({"role": "assistant", "content": ai_output})
# === 尝试解析 JSON ===
parsed = self._extract_and_validate_json(ai_output)
if parsed:
return parsed
else:
# 若无法解析为有效计划,则降级为普通对话
return self._create_fallback_response(ai_output)
except Exception as e:
logger.exception("处理语音指令时发生异常")
return self._create_fallback_response("抱歉,我遇到了一些技术问题,请稍后再试。")
def _extract_and_validate_json(self, text: str):
"""从文本中提取 JSON 并验证结构"""
try:
# 方法1:直接加载
data = json.loads(text)
return self._validate_plan_structure(data)
except json.JSONDecodeError:
pass
# 方法2:正则匹配第一个大括号包裹的内容
match = re.search(r'\{[\s\S]*\}', text)
if not match:
return None
try:
data = json.loads(match.group())
return self._validate_plan_structure(data)
except:
return None
def _validate_plan_structure(self, data: dict):
"""验证是否符合多任务计划格式"""
required_top_level = ["intent", "task_type", "execution_plan", "response_to_user", "requires_confirmation"]
for field in required_top_level:
if field not in data:
logger.warning(f"缺少必要字段: {field}")
return None
valid_operations = {
"play_music", "stop_music", "pause_music", "resume_music",
"open_application", "create_file", "read_file", "write_file",
"set_reminder", "speak_response", "exit"
}
for step in data["execution_plan"]:
op = step.get("operation")
params = step.get("parameters", {})
if not op or op not in valid_operations:
logger.warning(f"无效操作: {op}")
return None
if not isinstance(params, dict):
logger.warning(f"parameters 必须是对象: {params}")
return None
# 补全默认值
if "mode" not in data:
data["mode"] = "parallel"
return data
def _create_fallback_response(self, message: str):
"""降级响应:用于非结构化输出"""
return {
"intent": "chat",
"task_type": "reply",
"response_to_user": message,
"requires_confirmation": False,
"execution_plan": [],
"mode": "serial"
}
def _create_response(self, intent, action, parameters, response, needs_confirmation):
resp = {"intent": intent, "action": action, "parameters": parameters, "response": response, "needs_confirmation": needs_confirmation}
log_var("返回响应", resp)
return resp
@log_time
def generate_text(self, prompt, task_type="general"):
log_var("任务类型", task_type)
log_var("提示词长度", len(prompt))
try:
system_prompt = f"""
你是一个专业的文本生成助手。根据用户的要求生成高质量的文本内容。
任务类型:{task_type}
要求:{prompt}
请生成相应的文本内容,确保内容准确、有逻辑、语言流畅。
"""
response = Generation.call(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
temperature=0.8,
max_tokens=2000
)
if response.status_code == 200:
result = response.output['text']
log_var("生成结果长度", len(result))
return result
else:
error_msg = f"文本生成失败: {response.message}"
logger.error(error_msg)
return error_msg
except Exception as e:
logger.exception("文本生成出错")
return f"抱歉,生成文本时遇到错误:{str(e)}"
@log_time
def summarize_text(self, text):
log_var("待总结文本长度", len(text))
try:
prompt = f"请总结以下文本的主要内容:\n\n{text}"
response = Generation.call(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=500
)
if response.status_code == 200:
result = response.output['text']
log_var("总结结果长度", len(result))
return result
else:
error_msg = f"总结失败: {response.message}"
logger.error(error_msg)
return error_msg
except Exception as e:
logger.exception("文本总结出错")
return f"抱歉,总结文本时遇到错误:{str(e)}"
@log_time
def translate_text(self, text, target_language="英文"):
log_var("目标语言", target_language)
log_var("原文长度", len(text))
try:
prompt = f"请将以下文本翻译成{target_language}:\n\n{text}"
response = Generation.call(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=1000
)
if response.status_code == 200:
result = response.output['text']
log_var("翻译结果长度", len(result))
return result
else:
error_msg = f"翻译失败: {response.message}"
logger.error(error_msg)
return error_msg
except Exception as e:
logger.exception("文本翻译出错")
return f"抱歉,翻译文本时遇到错误:{str(e)}"
assistant = QWENAssistant()
from functools import wraps
import inspect
import logging
from Progress.app.qwen_assistant import assistant
# 全局注册表
REGISTERED_FUNCTIONS = {}
FUNCTION_SCHEMA = []
FUNCTION_MAP = {} # (intent, action) -> method_name
logger = logging.getLogger("ai_assistant")
def ai_callable(
*,
description: str,
params: dict,
intent: str = None,
action: str = None,
concurrent: bool = False # 新增:是否允许并发执行
):
def decorator(func):
func_name = func.__name__
metadata = {
"func": func,
"description": description,
"params": params,
"intent": intent,
"action": action,
"signature": str(inspect.signature(func)),
"concurrent": concurrent # 记录是否可并发
}
REGISTERED_FUNCTIONS[func_name] = metadata
FUNCTION_SCHEMA.append({
"name": func_name,
"description": description,
"parameters": params
})
if intent and action:
key = (intent, action)
if key in FUNCTION_MAP:
raise ValueError(f"冲突:语义 ({intent}, {action}) 已被函数 {FUNCTION_MAP[key]} 占用")
FUNCTION_MAP[key] = func_name
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper._ai_metadata = metadata
return wrapper
return decorator
"""
【语音识别模块】Speech Recognition (Offline)
使用麦克风进行实时语音识别,基于 Vosk 离线模型
支持单次识别 & 持续监听模式
音量可视化、模型路径检查、资源安全释放
"""
import random
import threading
import time
import logging
import json
import os
from vosk import Model, KaldiRecognizer
import pyaudio
from database import config
from Progress.utils.logger_utils import log_time, log_step, log_var, log_call
from Progress.utils.logger_config import setup_logger
# --- 配置参数 ---
VOICE_TIMEOUT = config.timeout # 最大等待语音输入时间(秒)
VOICE_PHRASE_TIMEOUT = config.phrase_timeout # 单句话最长录音时间
VOSK_MODEL_PATH = "./vosk-model-small-cn-0.22"
# --- 初始化日志器 ---
logger = logging.getLogger("ai_assistant")
# 定义最小有效音量阈值
MIN_VOLUME_THRESHOLD = 600 # 可调(根据环境测试)
class SpeechRecognizer:
def __init__(self):
self.current_timeout = 10 # 可被外部动态调整
self.model = None
self.recognizer = None
self.audio = None
self.is_listening = False
self.callback = None # 用户注册的回调函数:callback(text)
self._last_text = ""
self._listen_thread = None
self.sample_rate = 16000 # Vosk 要求采样率 16kHz
self.chunk_size = 1600 # 推荐帧大小(对应 ~100ms)
# 🔒 TTS 播放状态标志(由外部控制)
self._is_tts_playing = False
self._tts_lock = threading.Lock()
self._load_model()
self._init_audio_system()
@property
def is_tts_playing(self) -> bool:
with self._tts_lock:
return self._is_tts_playing
def set_tts_playing(self, status: bool):
"""供 TTS 模块调用:通知当前是否正在播放"""
with self._tts_lock:
self._is_tts_playing = status
if not status:
logger.debug("🟢 TTS 播放结束,语音识别恢复")
@log_step("加载 Vosk 离线模型")
@log_time
def _load_model(self):
"""加载本地 Vosk 模型"""
if not os.path.exists(VOSK_MODEL_PATH):
raise FileNotFoundError(f"❌ Vosk 模型路径不存在: {VOSK_MODEL_PATH}\n","请从 https://alphacephei.com/vosk/models 下载中文小模型并解压至此路径")
try:
logger.info(f"📦 正在加载模型: {VOSK_MODEL_PATH}")
self.model = Model(VOSK_MODEL_PATH)
log_call("✅ 模型加载成功")
except Exception as e:
logger.critical(f"🔴 加载 Vosk 模型失败: {e}")
raise RuntimeError("Failed to load Vosk model") from e
@log_step("初始化音频系统")
@log_time
def _init_audio_system(self):
"""初始化 PyAudio 并创建全局 recognizer"""
try:
self.audio = pyaudio.PyAudio()
# 创建默认识别器(可在每次识别前 Reset)
self.recognizer = KaldiRecognizer(self.model, self.sample_rate)
logger.debug("✅ 音频系统初始化完成")
except Exception as e:
logger.exception("❌ 初始化音频系统失败")
raise
@property
def last_text(self) -> str:
return self._last_text
def is_available(self) -> bool:
"""检查麦克风是否可用"""
if not self.audio:
return False
try:
stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk_size
)
stream.close()
return True
except Exception as e:
logger.error(f"🔴 麦克风不可用或无权限: {e}")
return False
@log_step("执行单次语音识别(自适应超时)")
@log_time
def listen_and_recognize(self, initial_timeout=10, long_speech_threshold=3.0, post_speech_long_wait=15, post_speech_short_wait=5) -> str:
"""
自适应语音识别:用户不说话时等待,说话后根据情况延长等待。
参数说明:
initial_timeout: 初始等待语音输入的最大时间(秒)
long_speech_threshold: 被认为是“长句”的最小语音持续时间(秒)
post_speech_long_wait: 长句后的等待时间(秒)
post_speech_short_wait: 短句或中途停顿时的等待时间(秒)
"""
start_time = time.time()
speech_start_time = None # 记录首次检测到语音的时间
last_speech_time = None # 最后一次有语音片段的时间
in_speech = False # 是否正在语音中
final_result_text = ""
silence_start_time = None # 开始静默的时间点
logger.debug(f"🎙️ 开始自适应语音识别 (初始等待={initial_timeout}s)...")
if self.is_tts_playing:
logger.info("🔇 TTS 正在播放,跳过本次识别")
return ""
logger.info("🔊 请说话...")
stream = None
try:
recognizer = KaldiRecognizer(self.model, self.sample_rate)
stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk_size
)
while True:
current_time = time.time()
# 检查是否应中断(TTS 播放)
if self.is_tts_playing:
logger.info("🔇 TTS 开始播放,中断识别")
break
# 超出初始等待时间且尚未开始说话 → 超时退出
if not in_speech and (current_time - start_time) > initial_timeout:
logger.info("💤 初始等待超时,未检测到语音输入")
break
# 读取音频数据
data = stream.read(self.chunk_size, exception_on_overflow=False)
# 获取音量(用于调试或可视化)
audio_level = max(data)
is_speaking = audio_level > MIN_VOLUME_THRESHOLD
# 处理 Vosk 识别结果
if recognizer.AcceptWaveform(data):
result_json = recognizer.FinalResult()
text = json.loads(result_json).get("text", "").strip()
if text:
final_result_text = text
last_speech_time = current_time
in_speech = True
if speech_start_time is None:
speech_start_time = current_time
logger.debug(f"✅ 完整句子识别: '{text}'")
recognizer.Reset() # 重置以便下一句
else:
partial_json = recognizer.PartialResult()
partial_text = json.loads(partial_json).get("partial", "").strip()
if partial_text:
last_speech_time = current_time
if not in_speech:
in_speech = True
speech_start_time = current_time
logger.debug(f"🎤 检测到语音开始: '{partial_text}'")
# === 动态判断是否结束 ===
if in_speech and last_speech_time:
elapsed_since_last_speech = current_time - last_speech_time
# 决定当前该用哪个等待阈值
if speech_start_time and (current_time - speech_start_time) > long_speech_threshold:
# 长句模式:等久一点
wait_duration = post_speech_long_wait
else:
# 短句或刚说一点就停:快速收尾
wait_duration = post_speech_short_wait
# 设置静默起点(首次进入静默)
if is_speaking:
silence_start_time = None
else:
if silence_start_time is None:
silence_start_time = current_time
elif (current_time - silence_start_time) >= wait_duration:
logger.info(f"🔚 静默超过 {wait_duration}s,判定语音结束")
break
time.sleep(0.05) # 小延迟减少 CPU 占用
# 返回最终识别结果
if final_result_text:
self._last_text = final_result_text
logger.info(f"🎯 识别结果: '{final_result_text}'")
return final_result_text
else:
logger.info("❓ 未识别到有效内容")
self._last_text = ""
return ""
except Exception as e:
logger.exception("🔴 执行语音识别时发生异常")
self._last_text = ""
return ""
finally:
if stream:
try:
stream.stop_stream()
stream.close()
except Exception as e:
logger.warning(f"⚠️ 关闭音频流失败: {e}")
@log_step("启动持续语音监听")
def start_listening(self, callback=None, language=None):
"""
启动后台线程持续监听语音输入
:param callback: 回调函数,接受一个字符串参数 text
:param language: 语言代码(忽略,由模型决定)
"""
if self.is_listening:
logger.warning("⚠️ 已在监听中,忽略重复启动")
return
if not callable(callback):
logger.error("🔴 回调函数无效,请传入可调用对象")
return
self.callback = callback
self.is_listening = True
self._listen_thread = threading.Thread(target=self._background_listen, args=(language,), daemon=True)
self._listen_thread.start()
logger.info("🟢 已启动后台语音监听")
@log_step("停止语音监听")
def stop_listening(self):
"""安全停止后台监听"""
if not self.is_listening:
return
self.is_listening = False
logger.info("🛑 正在停止语音监听...")
if self._listen_thread and self._listen_thread != threading.current_thread():
self._listen_thread.join(timeout=3)
if self._listen_thread.is_alive():
logger.warning("🟡 监听线程未能及时退出(可能阻塞)")
elif self._listen_thread == threading.current_thread():
logger.error("❌ 无法在当前线程中 join 自己!请检查调用栈")
else:
logger.debug("No thread to join")
logger.info("✅ 语音监听已停止")
def _background_listen(self, language=None):
"""后台循环监听线程"""
logger.debug("🎧 后台监听线程已启动")
stream = None
try:
stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk_size
)
except Exception as e:
logger.error(f"🔴 无法打开音频流: {e}")
return
try:
while self.is_listening:
# 🔴 检查是否正处于 TTS 播放中 → 跳过本次读取
if self.is_tts_playing:
time.sleep(0.1) # 减少 CPU 占用
continue
try:
data = stream.read(self.chunk_size, exception_on_overflow=False)
if self.recognizer.AcceptWaveform(data):
result_json = self.recognizer.Result()
result_dict = json.loads(result_json)
text = result_dict.get("text", "").strip()
if text and self.callback:
logger.info(f"🔔 回调触发: '{text}'")
self.callback(text)
self.recognizer.Reset()
else:
partial = json.loads(self.recognizer.PartialResult())
partial_text = partial.get("partial", "")
if partial_text.strip():
logger.debug(f"🗣️ 当前语音片段: '{partial_text}'")
except Exception as e:
logger.exception("Background listening error")
time.sleep(0.05)
finally:
if stream:
stream.stop_stream()
stream.close()
logger.debug("🔚 后台监听线程退出")
recognizer = SpeechRecognizer()
"""
【AI语音助手】主程序入口
集成语音识别、Qwen 意图理解、TTS 与动作执行
✅ 已修复:不再访问 _last_text 私有字段
✅ 增强:异常防护、类型提示、唤醒词预留接口
"""
import random
import sys
import time
import logging
# --- 导入日志工具 ---
from Progress.utils.logger_config import setup_logger
from Progress.utils.logger_utils import log_time, log_step, log_var, log_call
# --- 显式导入各模块核心类 ---
from Progress.app.voice_recognizer import recognizer
from Progress.app.qwen_assistant import assistant
from Progress.app.text_to_speech import tts_engine
from Progress.app.system_controller import executor
from database import config
# --- 初始化全局日志器 ---
logger = logging.getLogger("ai_assistant")
@log_step("处理一次语音交互(AI动态控制等待)")
@log_time
def handle_single_interaction():
text = recognizer.listen_and_recognize(recognizer.current_timeout)
if not text:
logger.info("🔇 未检测到语音")
return
logger.info(f"🗣️ 用户说: '{text}'")
# AI 决策包含是否预期后续输入
decision = assistant.process_voice_command(text)
expect_follow_up = decision.get("expect_follow_up", False)
# 执行任务
result = executor.execute_task_plan(decision)
ai_reply = build_reply(result)
# 根据 AI 判断设置下次等待策略
if expect_follow_up:
recognizer.current_timeout = random.uniform(10, 20)
logger.debug("🧠 AI 预期后续提问,延长等待时间")
else:
recognizer.current_timeout = 5
logger.debug("🔚 AI 认为对话结束,缩短等待")
# 回复
logger.info(f"🤖 回复: {ai_reply}")
tts_engine.speak(ai_reply)
@log_step("启动 AI 语音助手")
@log_time
def main():
logger.info("🚀 正在启动 AI 语音助手系统...")
try:
tts_engine.start()
log_call("✅ 所有模块初始化完成,进入监听循环")
log_call("\n" + "—" * 50)
log_call("🎙️ 语音助手已就绪")
log_call("💡 说出你的命令,例如:'打开浏览器'、'写一篇春天的文章'")
log_call("🛑 说出‘退出’、‘关闭’、‘停止’或‘拜拜’来结束程序")
log_call("—" * 50 + "\n")
while True:
try:
handle_single_interaction()
# 🚩 检查上一次执行的结果是否有退出请求
last_result = executor.last_result # 假设 TaskOrchestrator 记录了 last_result
if last_result and last_result.get("should_exit"):
logger.info("🎯 接收到退出指令,即将终止程序...")
break # 跳出循环,进入清理流程
except KeyboardInterrupt:
logger.info("🛑 用户主动中断 (Ctrl+C),准备退出...")
raise # 让 main 捕获并退出
except Exception as e:
logger.exception("⚠️ 单次交互过程中发生异常,已降级处理")
error_msg = "抱歉,我在处理刚才的操作时遇到了一点问题。"
logger.info(f"🗣️ 回复: {error_msg}")
tts_engine.speak(error_msg)
last_text = recognizer.last_text.lower()
exit_keywords = ['退出', '关闭', '停止', '拜拜', '再见']
if any(word in last_text for word in exit_keywords):
logger.info("🎯 用户请求退出,程序即将终止")
break
time.sleep(0.5)
tts_engine.stop()
logger.info("👋 语音助手已安全退出")
except KeyboardInterrupt:
logger.info("🛑 用户通过 Ctrl+C 中断程序")
print("\n👋 再见!")
except Exception as e:
logger.exception("❌ 主程序运行时发生未预期异常")
print(f"\n🚨 程序异常终止:{e}")
sys.exit(1)
if __name__ == "__main__":
if not logging.getLogger().handlers:
setup_logger(name="ai_assistant", log_dir="logs", level=logging.INFO)
main()