# 请对程序作如下修改:1.大文本框中的内容比较多时,自动滚动到最后;大文本框中的字号加大
import sys
import os
import time
import base64
import threading
import cv2
import numpy as np
from PIL import Image
import speech_recognition as sr
import pyttsx3
from PyQt5.QtWidgets import (QApplication, QMainWindow, QTabWidget, QWidget, QVBoxLayout, QHBoxLayout,
QTextEdit, QPushButton, QLineEdit, QLabel, QSlider, QProgressBar,
QFileDialog, QComboBox, QGroupBox, QCheckBox)
from PyQt5.QtGui import QPixmap, QImage, QFont
from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal
import ollama
from io import BytesIO
# 定义默认模型常量
DEFAULT_MODEL = "llama3.2-vision"
class ModelWorker(QThread):
responseSignal = pyqtSignal(str)
progressSignal = pyqtSignal(int)
completed = pyqtSignal()
def __init__(self, prompt, model, images=None, max_tokens=4096, temperature=0.7):
super().__init__()
self.prompt = prompt
self.model = model
self.images = images
self.max_tokens = max_tokens
self.temperature = temperature
self._is_running = True
def run(self):
try:
options = {
'num_predict': self.max_tokens,
'temperature': self.temperature
}
if self.images:
images_base64 = [self.image_to_base64(img) for img in self.images]
response = ollama.chat(
model=self.model,
messages=[{
'role': 'user',
'content': self.prompt,
'images': images_base64
}],
stream=True,
options=options
)
else:
response = ollama.chat(
model=self.model,
messages=[{'role': 'user', 'content': self.prompt}],
stream=True,
options=options
)
full_response = ""
for chunk in response:
if not self._is_running:
break
content = chunk.get('message', {}).get('content', '')
if content:
full_response += content
self.responseSignal.emit(full_response)
self.progressSignal.emit(25)
self.completed.emit()
except Exception as e:
self.responseSignal.emit(f"错误: {str(e)}")
self.completed.emit()
def image_to_base64(self, image_path):
with Image.open(image_path) as img:
img = img.convert('RGB')
buffered = BytesIO()
img.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def stop(self):
self._is_running = False
class SpeechWorker(QThread):
finished = pyqtSignal(str)
def run(self):
r = sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
try:
text = r.recognize_google(audio, language='zh-CN')
self.finished.emit(text)
except Exception as e:
self.finished.emit(f"语音识别错误: {str(e)}")
class MultiModalApp(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("多模态智能助手 - Gemma3:27b")
self.setGeometry(100, 100, 1200, 800)
self.setFont(QFont("Microsoft YaHei", 10))
# 主布局
self.tabs = QTabWidget()
self.setCentralWidget(self.tabs)
# 初始化默认模型
self.default_model = DEFAULT_MODEL
# 初始化模型列表
self.model_list = self.get_ollama_models()
# 创建功能页
self.create_chat_tab()
self.create_image_tab()
self.create_voice_tab()
self.create_video_tab()
self.create_settings_tab()
# 状态变量
self.current_images = []
self.model_worker = None
self.speech_worker = None
self.video_capture = None
self.video_timer = QTimer()
self.video_timer.timeout.connect(self.update_video_frame)
# 初始化语音引擎
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150)
self.engine.setProperty('volume', 0.9)
self.engine.setProperty('voice', 'zh') # 设置中文语音
# 检查Ollama连接
self.check_ollama_connection()
def get_ollama_models(self):
"""从Ollama服务器获取可用模型列表"""
try:
client = ollama.Client()
models = client.list()
# 确保我们正确处理API返回的数据结构
if 'models' in models:
model_names = [model['model'] for model in models['models']]
return model_names
else:
print(f"警告: Ollama返回的数据不包含模型列表: {models}")
return self.get_fallback_models()
except Exception as e:
print(f"获取模型列表失败: {e}")
return self.get_fallback_models()
def get_fallback_models(self):
"""获取后备模型列表"""
return [self.default_model, "llama3", "mistral", "gemma:2b"]
def create_chat_tab(self):
tab = QWidget()
layout = QVBoxLayout()
# 聊天历史
self.chat_history = QTextEdit()
self.chat_history.setReadOnly(True)
self.chat_history.setStyleSheet("background-color: #f0f8ff;")
# 参数控制
param_group = QGroupBox("模型参数")
param_layout = QHBoxLayout()
self.max_tokens = QSlider(Qt.Horizontal)
self.max_tokens.setRange(128, 8192)
self.max_tokens.setValue(2048)
self.max_tokens_label = QLabel("最大Token数: 2048")
self.temperature = QSlider(Qt.Horizontal)
self.temperature.setRange(0, 100)
self.temperature.setValue(70)
self.temperature_label = QLabel("温度: 0.7")
param_layout.addWidget(QLabel("最大Token:"))
param_layout.addWidget(self.max_tokens)
param_layout.addWidget(self.max_tokens_label)
param_layout.addWidget(QLabel("温度:"))
param_layout.addWidget(self.temperature)
param_layout.addWidget(self.temperature_label)
param_group.setLayout(param_layout)
# 输入区域
input_layout = QHBoxLayout()
self.input_field = QLineEdit()
self.input_field.setPlaceholderText("输入您的问题...")
self.send_button = QPushButton("发送")
self.clear_button = QPushButton("清空对话")
input_layout.addWidget(self.input_field, 4)
input_layout.addWidget(self.send_button, 1)
input_layout.addWidget(self.clear_button, 1)
# 进度条
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
# 组装布局
layout.addWidget(self.chat_history, 6)
layout.addWidget(param_group)
layout.addLayout(input_layout)
layout.addWidget(self.progress_bar)
tab.setLayout(layout)
self.tabs.addTab(tab, "文本对话")
# 信号连接
self.send_button.clicked.connect(self.send_chat)
self.clear_button.clicked.connect(self.clear_chat)
self.input_field.returnPressed.connect(self.send_chat)
self.max_tokens.valueChanged.connect(self.update_max_tokens_label)
self.temperature.valueChanged.connect(self.update_temperature_label)
def create_image_tab(self):
tab = QWidget()
layout = QVBoxLayout()
# 图像显示
image_layout = QHBoxLayout()
self.image_label = QLabel()
self.image_label.setAlignment(Qt.AlignCenter)
self.image_label.setStyleSheet("border: 2px solid gray; min-height: 300px;")
self.image_label.setText("图片预览区域")
image_layout.addWidget(self.image_label)
# 图像处理按钮
btn_layout = QHBoxLayout()
self.load_image_btn = QPushButton("加载图片")
self.analyze_image_btn = QPushButton("分析图片")
self.generate_image_btn = QPushButton("生成图片描述")
btn_layout.addWidget(self.load_image_btn)
btn_layout.addWidget(self.analyze_image_btn)
btn_layout.addWidget(self.generate_image_btn)
# 结果区域
self.image_result = QTextEdit()
self.image_result.setReadOnly(True)
self.image_result.setStyleSheet("background-color: #fffaf0;")
# 组装布局
layout.addLayout(image_layout, 3)
layout.addLayout(btn_layout)
layout.addWidget(self.image_result, 2)
tab.setLayout(layout)
self.tabs.addTab(tab, "图像分析")
# 信号连接
self.load_image_btn.clicked.connect(self.load_image)
self.analyze_image_btn.clicked.connect(self.analyze_image)
self.generate_image_btn.clicked.connect(self.generate_image_description)
def create_voice_tab(self):
tab = QWidget()
layout = QVBoxLayout()
# 语音控制
voice_layout = QHBoxLayout()
self.record_btn = QPushButton("开始录音")
self.play_btn = QPushButton("播放回复")
self.stop_btn = QPushButton("停止")
voice_layout.addWidget(self.record_btn)
voice_layout.addWidget(self.play_btn)
voice_layout.addWidget(self.stop_btn)
# 语音识别结果
self.voice_input = QTextEdit()
self.voice_input.setPlaceholderText("语音识别结果将显示在这里...")
# 语音合成设置
tts_group = QGroupBox("语音设置")
tts_layout = QHBoxLayout()
self.voice_speed = QSlider(Qt.Horizontal)
self.voice_speed.setRange(100, 300)
self.voice_speed.setValue(150)
self.voice_speed_label = QLabel("语速: 150")
self.voice_volume = QSlider(Qt.Horizontal)
self.voice_volume.setRange(0, 100)
self.voice_volume.setValue(90)
self.voice_volume_label = QLabel("音量: 90%")
tts_layout.addWidget(QLabel("语速:"))
tts_layout.addWidget(self.voice_speed)
tts_layout.addWidget(self.voice_speed_label)
tts_layout.addWidget(QLabel("音量:"))
tts_layout.addWidget(self.voice_volume)
tts_layout.addWidget(self.voice_volume_label)
tts_group.setLayout(tts_layout)
# 组装布局
layout.addLayout(voice_layout)
layout.addWidget(self.voice_input, 2)
layout.addWidget(tts_group)
tab.setLayout(layout)
self.tabs.addTab(tab, "语音交互")
# 信号连接
self.record_btn.clicked.connect(self.start_recording)
self.play_btn.clicked.connect(self.play_response)
self.stop_btn.clicked.connect(self.stop_audio)
self.voice_speed.valueChanged.connect(self.update_voice_speed)
self.voice_volume.valueChanged.connect(self.update_voice_volume)
def create_video_tab(self):
tab = QWidget()
layout = QVBoxLayout()
# 视频控制
video_control = QHBoxLayout()
self.load_video_btn = QPushButton("加载视频")
self.play_video_btn = QPushButton("播放")
self.pause_video_btn = QPushButton("暂停")
self.analyze_video_btn = QPushButton("分析视频")
video_control.addWidget(self.load_video_btn)
video_control.addWidget(self.play_video_btn)
video_control.addWidget(self.pause_video_btn)
video_control.addWidget(self.analyze_video_btn)
# 视频显示
video_layout = QHBoxLayout()
self.video_label = QLabel()
self.video_label.setAlignment(Qt.AlignCenter)
self.video_label.setStyleSheet("border: 2px solid gray; min-height: 300px;")
self.video_label.setText("视频预览区域")
video_layout.addWidget(self.video_label)
# 分析结果
self.video_result = QTextEdit()
self.video_result.setReadOnly(True)
self.video_result.setStyleSheet("background-color: #f5f5f5;")
# 组装布局
layout.addLayout(video_control)
layout.addLayout(video_layout, 3)
layout.addWidget(self.video_result, 2)
tab.setLayout(layout)
self.tabs.addTab(tab, "视频分析")
# 信号连接
self.load_video_btn.clicked.connect(self.load_video)
self.play_video_btn.clicked.connect(self.play_video)
self.pause_video_btn.clicked.connect(self.pause_video)
self.analyze_video_btn.clicked.connect(self.analyze_video)
def create_settings_tab(self):
tab = QWidget()
layout = QVBoxLayout()
# Ollama设置
ollama_group = QGroupBox("Ollama设置")
ollama_layout = QVBoxLayout()
self.ollama_host = QLineEdit("http://localhost:11434")
self.ollama_model = QComboBox()
# 添加从Ollama获取的模型列表
self.ollama_model.addItems(self.model_list)
# 设置默认模型
if self.default_model in self.model_list:
index = self.model_list.index(self.default_model)
self.ollama_model.setCurrentIndex(index)
elif self.model_list: # 如果模型列表不为空
self.default_model = self.model_list[0]
self.ollama_model.setCurrentIndex(0)
else: # 如果模型列表为空
self.ollama_model.addItem(self.default_model)
self.ollama_model.setCurrentIndex(0)
ollama_layout.addWidget(QLabel("服务器地址:"))
ollama_layout.addWidget(self.ollama_host)
ollama_layout.addWidget(QLabel("模型选择:"))
ollama_layout.addWidget(self.ollama_model)
ollama_group.setLayout(ollama_layout)
# 系统设置
system_group = QGroupBox("系统设置")
system_layout = QVBoxLayout()
self.auto_play = QCheckBox("自动播放语音回复")
self.stream_output = QCheckBox("启用流式输出")
self.stream_output.setChecked(True)
self.save_logs = QCheckBox("保存对话日志")
system_layout.addWidget(self.auto_play)
system_layout.addWidget(self.stream_output)
system_layout.addWidget(self.save_logs)
system_group.setLayout(system_layout)
# 组装布局
layout.addWidget(ollama_group)
layout.addWidget(system_group)
layout.addStretch()
tab.setLayout(layout)
self.tabs.addTab(tab, "系统设置")
# 信号连接
self.ollama_host.textChanged.connect(self.update_ollama_host)
self.ollama_model.currentIndexChanged.connect(self.update_ollama_model)
# 实现之前省略的方法
def update_max_tokens_label(self, value):
self.max_tokens_label.setText(f"最大Token数: {value}")
def update_temperature_label(self, value):
self.temperature_label.setText(f"温度: {value / 100:.1f}")
def update_voice_speed(self, value):
self.voice_speed_label.setText(f"语速: {value}")
self.engine.setProperty('rate', value)
def update_voice_volume(self, value):
self.voice_volume_label.setText(f"音量: {value}%")
self.engine.setProperty('volume', value / 100)
def send_chat(self):
prompt = self.input_field.text().strip()
if not prompt:
return
self.progress_bar.setVisible(True)
self.progress_bar.setRange(0, 0) # 不确定进度
self.chat_history.append(f"<b>用户:</b> {prompt}")
self.input_field.clear()
# 创建模型工作线程
self.model_worker = ModelWorker(
prompt=prompt,
model=self.ollama_model.currentText(),
max_tokens=self.max_tokens.value(),
temperature=self.temperature.value() / 100
)
self.model_worker.responseSignal.connect(self.update_chat_response)
self.model_worker.completed.connect(self.on_chat_completed)
self.model_worker.start()
def update_chat_response(self, response):
# 获取当前文本并移除旧的助手回复
current_text = self.chat_history.toPlainText()
if "助手:" in current_text:
current_text = current_text[:current_text.rfind("助手:")]
# 添加新回复
self.chat_history.setPlainText(f"{current_text}<b>助手:</b> {response}")
self.chat_history.moveCursor(self.chat_history.textCursor().End)
def on_chat_completed(self):
self.progress_bar.setVisible(False)
if self.auto_play.isChecked():
self.play_response()
def clear_chat(self):
self.chat_history.clear()
def load_image(self):
file_path, _ = QFileDialog.getOpenFileName(
self, "选择图片", "",
"图片文件 (*.png *.jpg *.jpeg *.bmp)"
)
if file_path:
self.current_images = [file_path]
pixmap = QPixmap(file_path)
pixmap = pixmap.scaled(600, 400, Qt.KeepAspectRatio)
self.image_label.setPixmap(pixmap)
def analyze_image(self):
if not self.current_images:
self.image_result.setText("请先加载图片")
return
self.progress_bar.setVisible(True)
self.progress_bar.setRange(0, 0)
prompt = "详细分析这张图片的内容,包括场景、对象、颜色和可能的含义"
self.model_worker = ModelWorker(
prompt=prompt,
model=self.ollama_model.currentText(),
images=self.current_images,
max_tokens=self.max_tokens.value(),
temperature=self.temperature.value() / 100
)
self.model_worker.responseSignal.connect(self.update_image_result)
self.model_worker.completed.connect(lambda: self.progress_bar.setVisible(False))
self.model_worker.start()
def update_image_result(self, response):
self.image_result.setText(response)
def generate_image_description(self):
if not self.current_images:
self.image_result.setText("请先加载图片")
return
self.progress_bar.setVisible(True)
self.progress_bar.setRange(0, 0)
prompt = "为这张图片生成一个简洁的描述,适合用于社交媒体分享"
self.model_worker = ModelWorker(
prompt=prompt,
model=self.ollama_model.currentText(),
images=self.current_images,
max_tokens=self.max_tokens.value(),
temperature=self.temperature.value() / 100
)
self.model_worker.responseSignal.connect(self.update_image_result)
self.model_worker.completed.connect(lambda: self.progress_bar.setVisible(False))
self.model_worker.start()
def start_recording(self):
self.record_btn.setEnabled(False)
self.voice_input.setText("正在录音...")
self.speech_worker = SpeechWorker()
self.speech_worker.finished.connect(self.on_speech_recognition_complete)
self.speech_worker.start()
def on_speech_recognition_complete(self, text):
self.record_btn.setEnabled(True)
self.voice_input.setText(text)
# 如果设置了自动播放,直接发送到聊天
if self.auto_play.isChecked():
self.input_field.setText(text)
self.send_chat()
def play_response(self):
if not self.chat_history.toPlainText():
return
# 获取最后一条助手的回复
full_text = self.chat_history.toPlainText()
last_response = full_text.split("助手:")[-1].strip()
# 播放语音
self.engine.say(last_response)
self.engine.runAndWait()
def stop_audio(self):
self.engine.stop()
def load_video(self):
file_path, _ = QFileDialog.getOpenFileName(
self, "选择视频", "",
"视频文件 (*.mp4 *.avi *.mov)"
)
if file_path:
self.video_path = file_path
self.video_capture = cv2.VideoCapture(file_path)
self.play_video()
def play_video(self):
if not hasattr(self, 'video_capture') or not self.video_capture.isOpened():
return
self.video_timer.start(30) # 约30fps
def pause_video(self):
self.video_timer.stop()
def update_video_frame(self):
ret, frame = self.video_capture.read()
if not ret:
self.video_timer.stop()
return
# 转换OpenCV BGR图像为Qt RGB图像
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, ch = frame.shape
bytes_per_line = ch * w
q_img = QImage(frame.data, w, h, bytes_per_line, QImage.Format_RGB888)
pixmap = QPixmap.fromImage(q_img)
pixmap = pixmap.scaled(800, 450, Qt.KeepAspectRatio)
self.video_label.setPixmap(pixmap)
def analyze_video(self):
if not hasattr(self, 'video_path'):
self.video_result.setText("请先加载视频")
return
self.progress_bar.setVisible(True)
self.progress_bar.setRange(0, 0)
prompt = "分析这段视频的主要内容,包括场景、关键动作和可能的主题"
self.model_worker = ModelWorker(
prompt=prompt,
model=self.ollama_model.currentText(),
max_tokens=self.max_tokens.value(),
temperature=self.temperature.value() / 100
)
self.model_worker.responseSignal.connect(self.update_video_result)
self.model_worker.completed.connect(lambda: self.progress_bar.setVisible(False))
self.model_worker.start()
def update_video_result(self, response):
self.video_result.setText(response)
def update_ollama_host(self, host):
os.environ["OLLAMA_HOST"] = host
self.check_ollama_connection()
def update_ollama_model(self, index):
# 模型切换逻辑
pass
def check_ollama_connection(self):
try:
client = ollama.Client(host=self.ollama_host.text())
models = client.list()
self.statusBar().showMessage("Ollama连接成功", 3000)
return True
except:
self.statusBar().showMessage("无法连接Ollama服务器,请检查设置", 5000)
return False
if __name__ == "__main__":
app = QApplication(sys.argv)
window = MultiModalApp()
window.show()
sys.exit(app.exec_())