优化后代码,还需要继续优化
import os
import time
import json
import pyautogui
import pygetwindow as gw
from pynput.keyboard import Controller, Key
from vosk import Model, KaldiRecognizer
import pyaudio
# ================== 配置区域 ==================
MODEL_PATH = "vosk-model-cn-0.22"
WORD_WINDOW_TITLE = "Microsoft Word"
WECHAT_WINDOW_TITLE = "微信"
INPUT_DELAY = 0.02 # 字符输入间隔
BUFFER_TIMEOUT = 1.0 # 输入缓冲时间(秒)
# =============================================
class VoiceAssistant:
def __init__(self):
self.keyboard = Controller()
self.text_buffer = []
self.last_input_time = time.time()
# 初始化语音识别
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"未找到语音模型:{MODEL_PATH}")
self.model = Model(MODEL_PATH)
self.recognizer = KaldiRecognizer(self.model, 16000)
# 初始化音频流
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=8192
)
# 命令映射表
self.punctuation_map = {
"逗号": ",",
"句号": "。",
"问号": "?",
"感叹号": "!",
"冒号": ":",
"分号": ";",
"顿号": "、",
"省略号": "……",
"空格": " ",
"换行": "\n",
"微信发送": ("cmd", self.send_wechat),
"输入文本": ("cmd", self.activate_word),
}
def activate_app(self, window_title):
"""激活指定应用程序窗口"""
windows = gw.getWindowsWithTitle(window_title)
if not windows:
print(f"未找到窗口:{window_title}")
return False
target_window = windows[0]
try:
target_window.activate()
time.sleep(0.5)
# 点击窗口中心区域
x = target_window.left + target_window.width // 2
y = target_window.top + target_window.height // 2
pyautogui.click(x, y)
time.sleep(0.2)
return True
except Exception as e:
print(f"激活窗口失败:{str(e)}")
return False
def activate_word(self):
"""激活Word并准备输入"""
return self.activate_app(WORD_WINDOW_TITLE)
def activate_wechat(self):
"""激活微信并准备输入"""
return self.activate_app(WECHAT_WINDOW_TITLE)
def send_wechat(self):
"""发送微信消息"""
if self.activate_wechat():
self.keyboard.press(Key.enter)
self.keyboard.release(Key.enter)
time.sleep(0.1)
def type_text(self, text):
"""模拟键盘输入"""
for char in text:
self.keyboard.press(char)
self.keyboard.release(char)
time.sleep(INPUT_DELAY)
def process_text(self, text):
"""处理识别文本,返回(有效文本,命令列表)"""
commands = []
processed = []
for word in text.split():
# 检查是否是特殊命令
if word in self.punctuation_map:
item = self.punctuation_map[word]
if isinstance(item, tuple) and item[0] == "cmd":
commands.append(item[1])
else:
processed.append(str(item))
else:
processed.append(word)
return " ".join(processed), commands
def flush_buffer(self):
"""将缓冲区内容输入并清空"""
if self.text_buffer:
combined_text = " ".join(self.text_buffer)
self.text_buffer = []
try:
if self.activate_word():
self.type_text(combined_text)
except Exception as e:
print(f"输入失败:{str(e)}")
def run(self):
"""运行主循环"""
print("语音输入已启动,请开始说话...")
try:
while True:
# 处理音频流
data = self.stream.read(4096, exception_on_overflow=False)
if self.recognizer.AcceptWaveform(data):
# 处理最终识别结果
result = json.loads(self.recognizer.Result())
text = result.get("text", "").strip()
if text:
print(f"完整识别:{text}")
processed_text, commands = self.process_text(text)
# 执行命令
for cmd in commands:
cmd()
# 缓存有效文本
if processed_text:
self.text_buffer.append(processed_text)
self.last_input_time = time.time()
else:
# 显示部分识别结果
partial = json.loads(self.recognizer.PartialResult())
partial_text = partial.get("partial", "")
if partial_text:
print(f"实时识别:{partial_text}")
# 超时自动提交
if time.time() - self.last_input_time > BUFFER_TIMEOUT:
self.flush_buffer()
except KeyboardInterrupt:
print("\n正在关闭...")
finally:
# 清理资源
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
if __name__ == "__main__":
try:
assistant = VoiceAssistant()
assistant.run()
except Exception as e:
print(f"启动失败:{str(e)}")
需要模型文件私发