p=pyaudio.PyAudio()出错

本次讨论聚焦于Python社区邮件列表中关于编程实践与技术问题的深入交流。参与者分享了关于Python编程的各种见解,包括最佳实践、常见错误及解决方案。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

""" 语音交互式DeepSeek问答系统 通过语音输入调用DeepSeek API回答问题 """ import json import os from vosk import Model, KaldiRecognizer import pyaudio import requests import mysql.connector from mysql.connector import Error from datetime import datetime from api.config import API_CONFIGS from doubaotts.doubaotts import VolcanoTTS # 初始化语音识别 model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'model', 'vosk-model-cn-0.22', 'vosk-model-cn-0.22') model = Model(model_path) rec = KaldiRecognizer(model, 16000) p = pyaudio.PyAudio() # 本地ollama模型配置 ollama_model = "deepseek-r1:7b" # 初始化音频输入 def init_audio(): # 列出可用音频设备 for i in range(p.get_device_count()): print(p.get_device_info_by_index(i)) # 使用默认输入设备 stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000, input_device_index=None) stream.start_stream() return stream # 调用本地ollama模型 def ask_deepseek(question): try: # 检查是否是数据库查询指令 if question.strip().lower().startswith("查询数据库:"): parts = question[len("查询数据库:"):].strip().split("|") if len(parts) == 2: db_name = parts[0].strip() query = parts[1].strip() result = query_other_db(db_name, query) if result is not None: return f"查询结果:\n{json.dumps(result, indent=2, ensure_ascii=False)}" else: return "查询失败,请检查数据库名称和查询语句" else: return "查询格式错误,请使用'查询数据库:数据库名|SQL查询语句'格式" # 普通问题处理 response = requests.post( "http://localhost:11434/api/generate", json={ "model": ollama_model, "prompt": question, "stream": False } ) if response.status_code == 200: return response.json()['response'].split('\n')[-1] # 只返回最后一行结果 else: return f"ollama模型错误: {response.status_code}" except Exception as e: return f"调用ollama模型时发生错误: {str(e)}" # 初始化MySQL连接 def init_db(): try: # 从配置中获取数据库连接参数 db_config = API_CONFIGS['mysql'] connection = mysql.connector.connect( host=db_config['host'], database=db_config['database'], user=db_config['user'], password=db_config['password'], port=db_config['port'], charset=db_config['charset'], connection_timeout=db_config['connection_timeout'] ) if connection.is_connected(): # 创建对话记录表 cursor = connection.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS conversations ( id INT AUTO_INCREMENT PRIMARY KEY, question TEXT NOT NULL, answer TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) connection.commit() return connection except Error as e: print(f"数据库连接错误: {e}") return None # 查询其他MySQL数据库 def query_other_db(database_name, query): try: # 从配置中获取基础连接参数 db_config = API_CONFIGS['mysql'] connection = mysql.connector.connect( host=db_config['host'], database=database_name, user=db_config['user'], password=db_config['password'], port=db_config['port'], charset=db_config['charset'], connection_timeout=db_config['connection_timeout'] ) if connection.is_connected(): cursor = connection.cursor(dictionary=True) cursor.execute(query) result = cursor.fetchall() connection.close() return result except Error as e: print(f"查询数据库{database_name}错误: {e}") return None # 保存对话到数据库 def save_conversation(connection, question, answer): try: cursor = connection.cursor() query = "INSERT INTO conversations (question, answer) VALUES (%s, %s)" cursor.execute(query, (question, answer)) connection.commit() except Error as e: print(f"保存对话失败: {e}") # 主程序 def main(): # 初始化数据库连接 db_connection = init_db() if not db_connection: print("警告: 无法连接到数据库,对话将不会被保存") stream = init_audio() print("请说出您的问题(按Ctrl+C退出)...") try: question = "" # 初始化question变量 while True: try: data = stream.read(4000, exception_on_overflow=False) if len(data) == 0: break if rec.AcceptWaveform(data): result = json.loads(rec.Result()) question = result['text'] if question.strip(): print(f"您的问题: {question}") answer = ask_deepseek(question) print(f"DeepSeek回答: {answer}") # 语音播报回答 from voice_management.voice_manager import VoiceManager voice_manager = VoiceManager() voice_manager.create_voice(answer) # 保存对话到数据库 if db_connection: save_conversation(db_connection, question, answer) question = "" # 重置question变量避免重复处理 else: partial = json.loads(rec.PartialResult()) if 'partial' in partial: print(f"正在识别: {partial['partial']}", end='\r') except OSError as e: if e.errno == -9981: # Input overflowed continue else: raise except KeyboardInterrupt: print("\n程序结束") finally: try: if stream.is_active(): stream.stop_stream() if not stream._closed: stream.close() except: pass p.terminate() # 关闭数据库连接 if db_connection and db_connection.is_connected(): db_connection.close() if __name__ == "__main__": main()能正常运行但是无法自动播放音频
07-16
有下述代码,想要实现的功能是实现多人通过麦克风说话能够识别出不同的识别人,并将其标识出来(例如SPK-1、SPK-2等),但是通过代码中的方法进行操作时,即当我运行代码mic_client.py后通过麦克风说话,发现不管是几个人说话,最终的结果都没有识别出不同的说话人,请问文档中的代码如何修改?可有什么方法能够实现实时语音识别说话人,并分离?例如"连续声纹识别"、"模型预热"等方法,请根据文档代码为基础,实现上述功能,并写出完整的代码、步骤以及最终可能呈现的结果!若是有多种方法能实现,请完整的写出每一种实现方法: 以下是在 Windows 服务器上部署 FunASR 实现实时双人语音识别分离的完整方案,包含环境配置、安装步骤、完整代码和效果演示: ### 一、安装步骤与依赖 #### 2. 创建Python环境 ```bash # 安装miniconda # 创建虚拟环境 conda create -n funasr python=3.10 -y conda activate funasr ``` #### 3. 安装Python依赖 ```bash # 安装PyTorch (CUDA 12.1兼容版本) pip install torch==2.1.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118 # 安装FunASR及其他依赖 pip install "funasr[modelscope]" --upgrade pip install pyaudio websockets soundfile ``` --- ### 二、部署方案架构 ``` 麦克风输入 → PyAudio采集 → WebSocket传输 → FunASR服务端 → 说话人分离 → 结果返回 ``` --- ### 三、完整实现代码 #### 1. 服务端代码 (`funasr_server.py`) ```python from funasr import AutoModel import numpy as np import websockets import asyncio import json import time import traceback # 添加错误追踪 print("开始加载模型...") try: # 加载模型 - 使用在线模型避免路径问题 model = AutoModel( model="iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn", vad_threshold=0.3, punc=True, device="cuda", diar_config={ "enable": True, "max_speakers": 2, "linkage_threshold": 0.75 }, disable_update=True ) print("✅ 模型加载成功") except Exception as e: print(f"❌ 模型加载失败: {str(e)}") raise SystemExit("无法加载模型,请检查配置") async def recognize_speech(websocket): """处理实时音频流并进行识别""" audio_buffer = [] last_process_time = time.time() chunk_count = 0 print(f"新客户端连接: {websocket.remote_address}") try: async for message in websocket: chunk_count += 1 current_time = time.time() # 接收16kHz 16bit mono PCM音频 audio_int16 = np.frombuffer(message, dtype=np.int16) # 关键修复:转换为模型需要的float32格式 audio_chunk = audio_int16.astype(np.float32) / 32768.0 # 归一化到[-1.0, 1.0] audio_buffer.append(audio_chunk) # 每1秒处理一次音频或达到10个块 if len(audio_buffer) >= 10 or current_time - last_process_time >= 1.0: full_audio = np.concatenate(audio_buffer) print(f"处理音频: {len(full_audio)}采样点 ({len(audio_buffer)}个块)") print(f"音频数据类型: {full_audio.dtype}, 范围: [{np.min(full_audio):.4f}, {np.max(full_audio):.4f}]") try: results = model.generate( input=full_audio, is_final=False, batch_size_token=5000 ) # 发送识别结果 if results and 'text' in results[0]: response = { "speaker": results[0].get("speaker", "UNKNOWN"), "text": results[0]["text"], "start_time": results[0].get("start", 0), "end_time": results[0].get("end", 0) } await websocket.send(json.dumps(response)) print(f"发送结果: {response['text']}") else: print("未检测到有效语音") except Exception as e: print(f"处理错误: {str(e)}") traceback.print_exc() # 打印完整错误堆栈 # 重置缓冲区和计时器 audio_buffer = [] last_process_time = current_time print(f"已处理 {chunk_count} 个音频块") except websockets.exceptions.ConnectionClosed: print(f"客户端断开连接: {websocket.remote_address}") except Exception as e: print(f"处理异常: {str(e)}") traceback.print_exc() async def main(): """启动WebSocket服务器""" server = await websockets.serve( recognize_speech, "0.0.0.0", 8765, ping_timeout=None ) print("FunASR服务已启动,监听端口 8765...") await server.wait_closed() if __name__ == "__main__": asyncio.run(main()) ``` #### 2. 客户端代码 (`mic_client.py`) ```python import pyaudio import websockets import asyncio import json import sys # 音频参数 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 CHUNK = 1600 # 100ms音频数据 async def recv_results(websocket): """单独处理结果接收""" try: while True: response = await websocket.recv() try: result = json.loads(response) print(f"[Speaker {result.get('speaker', 'UNKNOWN')}] {result.get('text', '')}") except json.JSONDecodeError: print(f"收到非JSON响应: {response[:50]}...") except websockets.exceptions.ConnectionClosed: print("服务端连接已关闭") async def stream_microphone(): """采集麦克风音频并发送到服务器""" p = pyaudio.PyAudio() # 打开麦克风流 stream = p.open( format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK ) print("麦克风已启动,按下Ctrl+C停止...") try: async with websockets.connect("ws://127.0.0.1:8765") as websocket: # 创建独立任务处理结果接收 recv_task = asyncio.create_task(recv_results(websocket)) try: while True: # 读取音频数据 data = stream.read(CHUNK, exception_on_overflow=False) # 发送音频数据 await websocket.send(data) # 添加短暂延迟避免CPU占用过高 await asyncio.sleep(0.01) except KeyboardInterrupt: print("停止采集") recv_task.cancel() finally: stream.stop_stream() stream.close() p.terminate() print("音频设备已释放") if __name__ == "__main__": try: asyncio.run(stream_microphone()) except KeyboardInterrupt: sys.exit(0) ``` --- ### 四、部署与运行 #### 1. 启动服务端 ```bash conda activate funasr_env python funasr_server.py ``` **输出:** ``` INFO:__main__:FunASR服务已启动,监听端口 8765... ``` #### 2. 启动客户端(在另一终端) ```bash # 修改your-server-ip为实际IP python mic_client.py ``` **输出:** ``` 麦克风已启动,按下Ctrl+C停止... ``` #### 3. 测试效果 当两人通过麦克风对话时: ``` [Speaker UNKNOWN] 我们今天的会议议程有三项 [Speaker UNKNOWN] 首先讨论项目进度 [Speaker UNKNOWN] 后端开发已经完成80% [Speaker UNKNOWN] 前端还需要两周时间 ``` ### 五、高级优化建议 1. **声纹预注册**(提升区分准确率): ```python # 在服务端启动前添加 spk1_emb = model.speaker_embed("speaker1_sample.wav") model.add_hotword(name="张三", voiceprint=spk1_emb) spk2_emb = model.speaker_embed("speaker2_sample.wav") model.add_hotword(name="李四", voiceprint=spk2_emb) ``` 2. **性能调优参数**: ```python model = AutoModel( quantize=True, # 量化模型减小显存 batch_size_token=8000, # 增大批处理 vad_silence_duration=200 # 静默检测时长(ms) ) ```
最新发布
07-16
package com.example.demoapplication; import android.Manifest; // 权限相关 import android.content.pm.PackageManager; // 包管理相关 import android.media.AudioFormat; // 音频格式定义 import android.media.AudioManager; // 音频管理 import android.media.AudioRecord; // 音频录制 import android.media.AudioTrack; // 音频播放 import android.media.MediaRecorder; // 媒体录制 import android.os.Bundle; // 数据存储 import android.os.Handler; // 消息处理 import android.os.Looper; // 主线程消息循环 import android.os.Message; // 消息对象 import android.speech.tts.TextToSpeech; // 文字转语音 import android.util.Base64; // Base64编码解码 import android.util.Log; // 日志工具 import android.widget.Button; // 按钮控件 import android.widget.Toast; // 短时提示 import androidx.annotation.NonNull; // 非空注解 import androidx.appcompat.app.AppCompatActivity; // 主活动基类 import androidx.core.app.ActivityCompat; // 权限兼容处理 import androidx.core.content.ContextCompat; // 上下文工具 import org.json.JSONException; // JSON异常 import org.json.JSONObject; // JSON对象 import java.io.BufferedReader; import java.io.BufferedWriter; // 缓冲写入 import java.io.ByteArrayInputStream; import java.io.IOException; // IO异常 import java.io.InputStreamReader; import java.io.OutputStreamWriter; // 输出流写入 import java.net.ServerSocket; // 服务器Socket import java.net.Socket; // 客户端Socket import java.util.LinkedList; // 链表结构 import java.util.Locale; // 地区信息 import java.util.Queue; // 队列结构 import java.util.concurrent.ExecutorService; // 线程池服务 import java.util.concurrent.Executors; // 线程池工厂 import java.util.concurrent.ScheduledExecutorService; // 定时任务执行器 import java.util.concurrent.TimeUnit; // 时间单位 import java.util.concurrent.atomic.AtomicBoolean; // 原子布尔值 /** * 主活动类,实现录音、播放、网络通信和TTS功能 */ public class MainActivity extends AppCompatActivity implements TextToSpeech.OnInitListener { // 日志标签 private static final String TAG = "AudioRecorder"; // UI控件 private Button startRecordButton, stopRecordButton; private Button playSoundButton, pauseSoundButton, stopSoundButton, resumeSoundButton, clearSoundsButton; private AudioRecord audioRecord; // 音频录制对象 // 音频配置常量 private static final int SAMPLE_RATE = 16000; // 采样率 private static final int BUFFER_SIZE; // 缓冲区大小 // 静态代码块计算缓冲区大小 static { int minBufferSize = AudioRecord.getMinBufferSize( SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT ); BUFFER_SIZE = Math.max(minBufferSize, 4096); } // 线程和状态管理 private ScheduledExecutorService scheduler; // 定时任务执行器 private AtomicBoolean isRecording = new AtomicBoolean(false); // 录音状态 private static final int PERMISSION_REQUEST_CODE = 1; // 权限请求码 private final ExecutorService executorService = Executors.newCachedThreadPool(); // 线程池 // 网络通信相关变量 private ServerSocket serverSocket; // 服务器Socket private volatile boolean isServerRunning = true; // 服务器运行状态 private volatile Socket clientSocket; // 客户端Socket private volatile BufferedWriter socketWriter; // Socket写入流 // TTS和音频播放控制 private TextToSpeech ttsEngine; // 文字转语音引擎 private boolean isTtsInitialized = false; // TTS初始化状态 private AudioTrack audioTrack; // 音频播放轨道 // 队列用于存储录音数据 private final Queue<byte[]> recordingQueue = new LinkedList<>(); // 正常录音队列 private final Queue<byte[]> pausedQueue = new LinkedList<>(); // 暂停队列 private final Queue<byte[]> playbackQueue = new LinkedList<>(); // 播放队列 // 原子变量确保线程安全的状态管理 private final AtomicBoolean isPlaying = new AtomicBoolean(false); // 播放状态 private final AtomicBoolean isPaused = new AtomicBoolean(false); // 暂停状态 private volatile boolean isPlaybackThreadActive = false; // 播放线程活跃状态 // 锁对象用于同步访问共享资源 private final Object audioTrackLock = new Object(); // 音频轨道锁 private final Object playbackQueueLock = new Object(); // 播放队列锁 private final Object recordingQueueLock = new Object(); // 录音队列锁 // 主线程Handler用于更新UI private final Handler handler = new Handler(Looper.getMainLooper()) { @Override public void handleMessage(@NonNull Message msg) { switch (msg.what) { case 0x11: // 客户端连接 Toast.makeText(MainActivity.this, "客户端已连接", Toast.LENGTH_SHORT).show(); break; case 0x12: // 开始录音 Toast.makeText(MainActivity.this, "开始录音", Toast.LENGTH_SHORT).show(); sendJsonPacket("startRecorder", null); playTts("开始录音"); break; case 0x14: // 停止录音 Toast.makeText(MainActivity.this, "停止录音", Toast.LENGTH_SHORT).show(); sendJsonPacket("stopRecorder", null); playTts("停止录音"); break; case 0x16: // 错误 Toast.makeText(MainActivity.this, "错误: " + msg.obj, Toast.LENGTH_LONG).show(); break; case 0x17: // 播放完成 Toast.makeText(MainActivity.this, "播放完成", Toast.LENGTH_SHORT).show(); isPlaying.set(false); isPlaybackThreadActive = false; updatePlayButtonsState(); break; case 0x18: // 播放队列已添加 Toast.makeText(MainActivity.this, "已添加到播放队列", Toast.LENGTH_SHORT).show(); break; case 0x19: // 播放状态更新 updatePlayButtonsState(); break; case 0x20: // 播放暂停 sendJsonPacket("pauseSound", null); playTts("播放暂停"); break; case 0x21: // 播放停止 sendJsonPacket("stopSound", null); playTts("播放停止"); break; case 0x22: // 播放恢复 sendJsonPacket("resumeSound", null); playTts("继续播放"); break; case 0x23: // 清空声音 sendJsonPacket("clearSounds", null); playTts("清空所有录音"); break; case 0x24: // 接收到新的播放指令 String base64Data = (String) msg.obj; try { byte[] decodedData = Base64.decode(base64Data, Base64.DEFAULT); addBase64ToPlaybackQueue(decodedData); } catch (IllegalArgumentException e) { Log.e(TAG, "Base64解码失败", e); sendErrorMessage("无效的Base64数据"); } break; } } }; /** * Activity创建方法 * @param savedInstanceState 保存的状态 */ @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); // 初始化TTS引擎 ttsEngine = new TextToSpeech(this, this); initViews(); setupClickListeners(); checkPermissions(); startServer(30000); startSocketListener(); // 启动Socket监听 } /** * 初始化视图组件 */ private void initViews() { // 绑定按钮 startRecordButton = findViewById(R.id.startRecordButton); stopRecordButton = findViewById(R.id.stopRecordButton); playSoundButton = findViewById(R.id.playSoundButton); pauseSoundButton = findViewById(R.id.pauseSoundButton); stopSoundButton = findViewById(R.id.stopSoundButton); resumeSoundButton = findViewById(R.id.resumeSoundButton); clearSoundsButton = findViewById(R.id.clearSoundsButton); // 初始按钮状态设置 stopRecordButton.setEnabled(false); pauseSoundButton.setEnabled(false); stopSoundButton.setEnabled(false); resumeSoundButton.setEnabled(false); } /** * 设置按钮点击监听器 */ private void setupClickListeners() { startRecordButton.setOnClickListener(v -> startRecording()); stopRecordButton.setOnClickListener(v -> stopRecording()); playSoundButton.setOnClickListener(v -> addToPlaybackQueue()); pauseSoundButton.setOnClickListener(v -> { pausePlayback(); handler.sendEmptyMessage(0x20); }); stopSoundButton.setOnClickListener(v -> { stopPlayback(); handler.sendEmptyMessage(0x21); }); resumeSoundButton.setOnClickListener(v -> { if (isPaused.get() && !playbackQueue.isEmpty()) { resumePlayback(); handler.sendEmptyMessage(0x22); } }); clearSoundsButton.setOnClickListener(v -> { clearAllRecordings(); handler.sendEmptyMessage(0x23); }); } // ==================== 录音功能实现 ==================== /** * 开始录音 */ private void startRecording() { // 检查录音权限 if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { sendErrorMessage("没有录音权限"); return; } // 如果已经在录音,先释放资源 if (isRecording.get()) { releaseAudioResources(); } try { // 初始化录音器 audioRecord = new AudioRecord( MediaRecorder.AudioSource.MIC, SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, BUFFER_SIZE ); if (audioRecord.getState() != AudioRecord.STATE_INITIALIZED) { throw new IllegalStateException("AudioRecord 初始化失败"); } // 开始录音 audioRecord.startRecording(); isRecording.set(true); // 清空之前的录音数据 synchronized (recordingQueueLock) { recordingQueue.clear(); } pausedQueue.clear(); // 更新UI状态 startRecordButton.setEnabled(false); stopRecordButton.setEnabled(true); updatePlayButtonsState(); // 启动录音数据采集线程 if (scheduler != null && !scheduler.isShutdown()) { scheduler.shutdownNow(); } scheduler = Executors.newSingleThreadScheduledExecutor(); scheduler.scheduleAtFixedRate(this::captureAudioData, 0, 100, TimeUnit.MILLISECONDS); // 发送开始录音通知 handler.sendEmptyMessage(0x12); } catch (Exception e) { Log.e(TAG, "录音启动失败", e); sendErrorMessage("录音启动失败: " + e.getMessage()); releaseAudioResources(); } } /** * 停止录音 */ private void stopRecording() { if (!isRecording.get()) return; isRecording.set(false); releaseAudioResources(); // 更新UI状态 stopRecordButton.setEnabled(false); startRecordButton.setEnabled(true); updatePlayButtonsState(); // 发送停止录音通知 handler.sendEmptyMessage(0x14); } /** * 采集音频数据并保存到队列 */ private void captureAudioData() { if (!isRecording.get() || audioRecord == null) return; byte[] buffer = new byte[BUFFER_SIZE]; try { int bytesRead = audioRecord.read(buffer, 0, BUFFER_SIZE); if (bytesRead > 0) { // 将录制的音频数据保存到队列 synchronized (recordingQueueLock) { recordingQueue.offer(buffer.clone()); } // 发送录音数据包 String base64Data = Base64.encodeToString(buffer, Base64.DEFAULT); sendJsonPacket("recording", base64Data); } } catch (Exception e) { Log.e(TAG, "音频采集失败", e); } } // ==================== 录音功能结束 ==================== // ==================== 播放功能实现 ==================== /** * 添加当前录音到播放队列 */ private void addToPlaybackQueue() { if (recordingQueue.isEmpty()) { Toast.makeText(this, "没有可播放的录音", Toast.LENGTH_SHORT).show(); return; } // 创建录音数据副本 Queue<byte[]> recordingCopy = new LinkedList<>(); synchronized (recordingQueueLock) { for (byte[] data : recordingQueue) { recordingCopy.offer(data.clone()); } } // 添加到播放队列 synchronized (playbackQueueLock) { playbackQueue.addAll(recordingCopy); } // 如果当前没有播放,立即开始播放 if (!isPlaybackThreadActive && !isPlaying.get()) { executorService.execute(this::playRecordingQueue); } else { handler.sendEmptyMessage(0x18); } } /** * 将Base64编码的音频数据添加到播放队列 * @param decodedData 解码后的音频数据 */ private void addBase64ToPlaybackQueue(byte[] decodedData) { if (decodedData == null || decodedData.length == 0) { Log.w(TAG, "无效的音频数据"); return; } ByteArrayInputStream inputStream = new ByteArrayInputStream(decodedData); byte[] buffer; int bytesRead; // 使用固定大小的缓冲区读取数据 buffer = new byte[4096]; // 使用固定的缓冲区大小 try { while ((bytesRead = inputStream.read(buffer)) != -1) { if (bytesRead > 0) { byte[] dataChunk = new byte[bytesRead]; System.arraycopy(buffer, 0, dataChunk, 0, bytesRead); synchronized (playbackQueueLock) { playbackQueue.offer(dataChunk); } } } } catch (IOException e) { Log.e(TAG, "读取音频数据失败", e); } finally { try { inputStream.close(); } catch (IOException e) { Log.e(TAG, "关闭输入流失败", e); } } // 如果当前没有播放,立即开始播放 if (!isPlaybackThreadActive && !isPlaying.get()) { executorService.execute(this::playRecordingQueue); } else { handler.sendEmptyMessage(0x18); // 提示已添加到播放队列 } } /** * 播放录音队列 */ private void playRecordingQueue() { isPlaybackThreadActive = true; isPlaying.set(true); isPaused.set(false); handler.sendEmptyMessage(0x19); // 更新按钮状态 // 配置音频播放器 int bufferSize = AudioTrack.getMinBufferSize( SAMPLE_RATE, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT ); // 创建新的AudioTrack synchronized (audioTrackLock) { if (audioTrack != null) { try { audioTrack.stop(); audioTrack.release(); } catch (Exception e) { Log.e(TAG, "释放AudioTrack失败", e); } } try { audioTrack = new AudioTrack( AudioManager.STREAM_MUSIC, SAMPLE_RATE, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize, AudioTrack.MODE_STREAM ); audioTrack.play(); } catch (IllegalStateException e) { Log.e(TAG, "创建AudioTrack失败", e); stopPlayback(); return; } } // 播放队列中的所有录音数据 while (isPlaying.get() && !playbackQueue.isEmpty()) { if (isPaused.get()) { // 暂停状态,等待恢复 try { Thread.sleep(100); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; } continue; } byte[] audioData; synchronized (playbackQueueLock) { audioData = playbackQueue.poll(); } if (audioData != null) { synchronized (audioTrackLock) { if (audioTrack != null && audioTrack.getState() == AudioTrack.STATE_INITIALIZED) { try { audioTrack.write(audioData, 0, audioData.length); } catch (IllegalStateException e) { Log.e(TAG, "音频写入失败: " + e.getMessage()); break; } } else { Log.w(TAG, "AudioTrack不可用,停止播放"); break; } } } } // 确保播放完成时正确释放资源 try { synchronized (audioTrackLock) { if (audioTrack != null) { if (audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { audioTrack.stop(); } audioTrack.release(); audioTrack = null; } } } catch (Exception e) { Log.e(TAG, "播放完成后释放资源失败", e); } // 播放完成 stopPlayback(); handler.sendEmptyMessage(0x17); } /** * 暂停播放 */ private void pausePlayback() { if (!isPlaying.get() || isPaused.get()) return; isPaused.set(true); // 保存当前播放位置 synchronized (playbackQueueLock) { pausedQueue.clear(); pausedQueue.addAll(playbackQueue); playbackQueue.clear(); } // 暂停音频播放 synchronized (audioTrackLock) { if (audioTrack != null && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { try { audioTrack.pause(); } catch (IllegalStateException e) { Log.e(TAG, "暂停播放失败: " + e.getMessage()); } } } handler.sendEmptyMessage(0x19); // 更新按钮状态 runOnUiThread(() -> Toast.makeText(MainActivity.this, "播放已暂停", Toast.LENGTH_SHORT).show() ); } /** * 继续播放 */ private void resumePlayback() { if (!isPaused.get() || pausedQueue.isEmpty()) { return; } isPaused.set(false); isPlaying.set(true); // 恢复播放位置 synchronized (playbackQueueLock) { playbackQueue.clear(); playbackQueue.addAll(pausedQueue); pausedQueue.clear(); } // 恢复音频播放 synchronized (audioTrackLock) { if (audioTrack != null && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PAUSED) { try { audioTrack.play(); } catch (IllegalStateException e) { Log.e(TAG, "恢复播放失败: " + e.getMessage()); } } } handler.sendEmptyMessage(0x19); // 更新按钮状态 runOnUiThread(() -> Toast.makeText(MainActivity.this, "继续播放", Toast.LENGTH_SHORT).show() ); } /** * 停止播放 */ private void stopPlayback() { isPlaying.set(false); isPaused.set(false); synchronized (audioTrackLock) { if (audioTrack != null) { try { if (audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING || audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PAUSED) { audioTrack.stop(); } audioTrack.release(); } catch (IllegalStateException e) { Log.e(TAG, "停止播放失败: " + e.getMessage()); } finally { audioTrack = null; } } } synchronized (playbackQueueLock) { playbackQueue.clear(); } pausedQueue.clear(); runOnUiThread(() -> { handler.sendEmptyMessage(0x19); // 更新按钮状态 Toast.makeText(MainActivity.this, "播放已停止", Toast.LENGTH_SHORT).show(); } ); } private void clearAllRecordings() { stopPlayback(); synchronized (recordingQueueLock) { recordingQueue.clear(); } pausedQueue.clear(); synchronized (playbackQueueLock) { playbackQueue.clear(); } handler.sendEmptyMessage(0x19); // 更新按钮状态 runOnUiThread(() -> Toast.makeText(MainActivity.this, "所有录音已清除", Toast.LENGTH_SHORT).show() ); } // ==================== 播放功能结束 ==================== // ==================== 辅助方法 ==================== /** * 更新播放按钮状态 */ private void updatePlayButtonsState() { runOnUiThread(() -> { boolean hasRecordings = !recordingQueue.isEmpty() || !pausedQueue.isEmpty(); boolean isPlayingState = isPlaying.get() && !isPaused.get(); playSoundButton.setEnabled(hasRecordings && !isPlayingState); pauseSoundButton.setEnabled(isPlayingState); stopSoundButton.setEnabled(isPlaying.get() || isPaused.get()); resumeSoundButton.setEnabled(!playbackQueue.isEmpty() && isPaused.get()); clearSoundsButton.setEnabled(hasRecordings); }); } /** * 播放TTS语音 * @param text 要播放的文本 */ private void playTts(String text) { if (isTtsInitialized) { ttsEngine.speak(text, TextToSpeech.QUEUE_FLUSH, null); } } /** * 释放音频资源 */ private void releaseAudioResources() { if (audioRecord != null) { try { if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { audioRecord.stop(); } audioRecord.release(); } catch (IllegalStateException e) { Log.e(TAG, "停止录音失败", e); } finally { audioRecord = null; } } if (scheduler != null) { try { scheduler.shutdownNow(); if (!scheduler.awaitTermination(500, TimeUnit.MILLISECONDS)) { Log.w(TAG, "录音线程池未正常关闭"); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); } finally { scheduler = null; } } } /** * 发送JSON格式的数据包 * @param type 数据包类型 * @param data 数据内容(可以为null) */ private void sendJsonPacket(String type, Object data) { if (clientSocket == null || clientSocket.isClosed() || socketWriter == null) { return; } try { JSONObject packet = new JSONObject(); packet.put("type", type); if (data != null) { packet.put("data", data); } synchronized (this) { if (socketWriter != null) { socketWriter.write(packet.toString()); socketWriter.write("\n\n"); socketWriter.flush(); } } } catch (Exception e) { Log.e(TAG, "发送数据包失败: " + type, e); } } /** * 发送错误消息 * @param message 错误信息 */ private void sendErrorMessage(String message) { handler.obtainMessage(0x16, message).sendToTarget(); } /** * 检查权限 */ private void checkPermissions() { if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSION_REQUEST_CODE); } } /** * 启动服务器 * @param port 监听端口 */ private void startServer(int port) { executorService.execute(() -> { try { serverSocket = new ServerSocket(port); Log.i(TAG, "服务器启动: " + port); while (isServerRunning) { try { Socket socket = serverSocket.accept(); clientSocket = socket; synchronized (this) { socketWriter = new BufferedWriter( new OutputStreamWriter(socket.getOutputStream(), "UTF-8")); } handler.sendEmptyMessage(0x11); } catch (IOException e) { if (isServerRunning) Log.e(TAG, "接受连接失败", e); } } } catch (IOException e) { Log.e(TAG, "服务器启动失败", e); runOnUiThread(() -> Toast.makeText(this, "服务器启动失败: " + e.getMessage(), Toast.LENGTH_LONG).show()); } finally { closeServerSocket(); } }); } /** * 启动Socket监听 */ private void startSocketListener() { executorService.execute(() -> { while (true) { if (clientSocket != null && !clientSocket.isClosed()) { try { BufferedReader reader = new BufferedReader( new InputStreamReader(clientSocket.getInputStream(), "UTF-8")); StringBuilder packetBuilder = new StringBuilder(); String line; while ((line = reader.readLine()) != null) { if (line.isEmpty()) { // 收到两个换行符,表示一个数据包结束 if (packetBuilder.length() > 0) { String packet = packetBuilder.toString(); Log.d(TAG, "收到数据包: " + packet); try { JSONObject command = new JSONObject(packet); String type = command.getString("type"); switch (type) { case "playSound": String base64Data = command.getString("data"); Message msg = handler.obtainMessage(0x24, base64Data); handler.sendMessage(msg); break; case "pauseSound": handler.sendEmptyMessage(0x20); break; case "stopSound": handler.sendEmptyMessage(0x21); break; case "resumeSound": handler.sendEmptyMessage(0x22); break; case "clearSounds": handler.sendEmptyMessage(0x23); break; default: Log.w(TAG, "未知指令类型: " + type); } } catch (JSONException e) { Log.e(TAG, "JSON解析失败", e); } // 重置包构建器 packetBuilder.setLength(0); } } else { // 添加数据到当前包 packetBuilder.append(line); } } } catch (IOException e) { Log.e(TAG, "Socket读取失败", e); } } else { try { Thread.sleep(500); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; } } } }); } /** * 关闭服务器Socket */ private void closeServerSocket() { try { if (serverSocket != null && !serverSocket.isClosed()) { serverSocket.close(); } } catch (IOException e) { Log.w(TAG, "关闭ServerSocket失败", e); } } /** * TTS初始化回调 * @param status 初始化状态 */ @Override public void onInit(int status) { if (status == TextToSpeech.SUCCESS) { int result = ttsEngine.setLanguage(Locale.CHINESE); if (result == TextToSpeech.LANG_MISSING_DATA || result == TextToSpeech.LANG_NOT_SUPPORTED) { Log.e(TAG, "TTS语言不支持中文"); } else { isTtsInitialized = true; } } } /** * 活动销毁时调用 */ @Override protected void onDestroy() { super.onDestroy(); isServerRunning = false; if (ttsEngine != null) { ttsEngine.stop(); ttsEngine.shutdown(); } closeServerSocket(); closeSocket(clientSocket); // 停止所有录音和播放 stopRecording(); stopPlayback(); // 优雅关闭线程池 executorService.shutdown(); try { if (!executorService.awaitTermination(800, TimeUnit.MILLISECONDS)) { executorService.shutdownNow(); } } catch (InterruptedException e) { executorService.shutdownNow(); Thread.currentThread().interrupt(); } // 确保所有资源释放 releaseAudioResources(); } /** * 关闭Socket * @param socket 要关闭的Socket */ private void closeSocket(Socket socket) { try { if (socket != null && !socket.isClosed()) { socket.close(); } } catch (IOException e) { Log.w(TAG, "关闭Socket失败", e); } if (socket == clientSocket) { clientSocket = null; synchronized (this) { socketWriter = null; } } } } 生成python脚本
07-04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值