in query_vcvarsall raise ValueError(str(list(result.keys()))) ValueError: [u'path'] 解决

本文介绍了一个在使用pip安装包时遇到的错误,并给出了详细的错误信息。该错误与Python的编译器设置有关,通过下载并安装VCForPython27可以解决此问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

执行 pip install ****** 时,报异常 :

File "D:\Program Files (x86)\python27\lib\distutils\msvc9compiler.py", line 299, in query_vcvarsall raise ValueError(str(list(result.keys()))) ValueError: [u'path']    

----------------------------------------
    Command ""D:\Program Files (x86)\python27\python.exe" -c "import setuptools,
 tokenize;__file__='c:\\users\\meng.li\\appdata\\local\\temp\\pip-build-g9fu3q\\
storm\\setup.py';exec(compile(getattr(tokenize, 'open', open)(__file__).read().r
eplace('\r\n', '\n'), __file__, 'exec'))" install --record c:\users\meng.li\appd
ata\local\temp\pip-hqg7m5-record\install-record.txt --single-version-externally-
managed --compile" failed with error code 1 in c:\users\meng.li\appdata\local\te
mp\pip-build-g9fu3q\storm

解决方法:

dowload: VCForPython27(http://www.microsoft.com/en-us/download/details.aspx?id=44266)

安装解决。

我正在编辑【python】代码,遇到了 【Traceback (most recent call last): File "D:\python\python3.11.5\Lib\site-packages\langchain\_api\module_import.py", line 69, in import_by_name module = importlib.import_module(new_module) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\python\python3.11.5\Lib\importlib\__init__.py", line 126, in import_module return _bootstrap._gcd_import(name[level:], package, level) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "<frozen importlib._bootstrap>", line 1204, in _gcd_import File "<frozen importlib._bootstrap>", line 1176, in _find_and_load File "<frozen importlib._bootstrap>", line 1126, in _find_and_load_unlocked File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed File "<frozen importlib._bootstrap>", line 1204, in _gcd_import File "<frozen importlib._bootstrap>", line 1176, in _find_and_load File "<frozen importlib._bootstrap>", line 1140, in _find_and_load_unlocked ModuleNotFoundError: No module named &#39;langchain_community&#39; The above exception was the direct cause of the following exception: Traceback (most recent call last): File "D:\code\nepu_spider\rag\rag_sys.py", line 11, in <module> from langchain.embeddings import HuggingFaceEmbeddings File "D:\python\python3.11.5\Lib\site-packages\langchain\embeddings\__init__.py", line 167, in __getattr__ return _import_attribute(name) ^^^^^^^^^^^^^^^^^^^^^^^ File "D:\python\python3.11.5\Lib\site-packages\langchain\_api\module_import.py", line 72, in import_by_name raise ModuleNotFoundError( ModuleNotFoundError: Module langchain_community.embeddings not found. Please install langchain-community to access this module. You can install it using `pip install -U langchain-community` 】 ,请帮我检查并改正错误点。我的原始代码如下: 【import os import json import pickle from langchain_core.language_models import LLM import install_faiss import numpy as np import requests from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import OpenAI from langchain.docstore.document import Document from typing import List, Dict, Any, Tuple, Optional # 自定义智谱AI的LangChain包装器 class ZhipuAILLM(LLM): def __init__(self, api_key: str, model: str = "glm-4", temperature: float = 0.7, zhipuai=None): """初始化智谱AI模型""" self.api_key = api_key self.model = model self.temperature = temperature zhipuai.api_key = api_key # 设置API密钥 def _call(self, prompt: str, stop: Optional[List[str]] = None, zhipuai=None) -> str: """调用智谱AI API生成文本""" try: response = zhipuai.model_api.invoke( model=self.model, prompt=[{"role": "user", "content": prompt}], temperature=self.temperature, top_p=0.7, ) if response["code"] == 200: return response["data"]["choices"][0]["content"] else: raise ValueError(f"智谱AI API错误: {response[&#39;msg&#39;]}") except Exception as e: raise RuntimeError(f"调用智谱AI失败: {str(e)}") @property def _llm_type(self) -> str: return "zhipuai" class RAGSystem: def __init__(self, config: Dict[str, Any]): """初始化RAG系统""" self.config = config self.embedding_model = self._load_embedding_model() self.llm = self._load_llm() self.vector_db = None self.doc_id_map = {} # 自动加载或创建索引 self._initialize_index() def _load_embedding_model(self) -> HuggingFaceEmbeddings: """加载嵌入模型""" model_name = self.config.get("embedding_model", "sentence-transformers/all-mpnet-base-v2") return HuggingFaceEmbeddings( model_name=model_name, model_kwargs={"device": self.config.get("embedding_device", "cpu")} ) def _load_llm(self): """加载大语言模型 - 使用智谱AI""" llm_provider = self.config.get("llm_provider", "zhipuai") if llm_provider == "zhipuai": return ZhipuAILLM( api_key=self.config["zhipuai_api_key"], # 使用智谱AI密钥 model=self.config.get("llm_model", "glm-4"), temperature=self.config.get("temperature", 0.7) ) elif llm_provider == "openai": # 保留OpenAI支持 from langchain.llms import OpenAI return OpenAI( api_key=self.config["openai_api_key"], model_name=self.config.get("llm_model", "gpt-3.5-turbo"), temperature=self.config.get("temperature", 0.7) ) else: raise ValueError(f"不支持的LLM提供者: {llm_provider}") def _initialize_index(self): """初始化索引:加载现有或创建新索引""" index_path = self.config["index_path"] if os.path.exists(index_path): print(f"加载现有索引: {index_path}") self._load_vector_index() else: print(f"创建新索引: {index_path}") self._create_new_index() def _create_new_index(self): """创建新索引""" data_dir = self.config["data_dir"] if not os.path.exists(data_dir): print(f"数据目录不存在: {data_dir}") if self.config.get("auto_download", False): self._download_sample_data() else: raise FileNotFoundError(f"数据目录不存在: {data_dir}") documents = self._load_and_process_documents() self._create_vector_index(documents) def _download_sample_data(self): """下载示例数据""" print("下载示例数据...") data_dir = self.config["data_dir"] os.makedirs(data_dir, exist_ok=True) sample_urls = [ "https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs_skeleton.json" ] for url in sample_urls: response = requests.get(url) filename = os.path.basename(url) file_path = os.path.join(data_dir, filename) with open(file_path, "wb") as f: f.write(response.content) print(f"下载完成: {filename}") def _load_and_process_documents(self) -> List[Document]: """加载并处理文档""" documents = [] data_dir = self.config["data_dir"] # 支持多种文件格式 for filename in os.listdir(data_dir): file_path = os.path.join(data_dir, filename) if filename.endswith(".json") or filename.endswith(".jsonl"): documents.extend(self._load_json_documents(file_path)) elif filename.endswith(".txt"): documents.extend(self._load_text_documents(file_path)) if not documents: raise ValueError(f"在 {data_dir} 中没有找到可处理的文档") # 文本分块 return self._split_documents(documents) def _load_json_documents(self, file_path: str) -> List[Document]: """加载JSON或JSONL文档""" documents = [] with open(file_path, "r") as f: if file_path.endswith(".jsonl"): # 处理JSONL文件 for line in f: try: data = json.loads(line) doc = self._create_document_from_data(data) documents.append(doc) except json.JSONDecodeError: print(f"跳过无效的JSON行: {line.strip()}") else: # 处理JSON文件 try: data = json.load(f) if isinstance(data, list): for item in data: doc = self._create_document_from_data(item) documents.append(doc) elif isinstance(data, dict): doc = self._create_document_from_data(data) documents.append(doc) except json.JSONDecodeError: print(f"无效的JSON文件: {file_path}") return documents def _load_text_documents(self, file_path: str) -> List[Document]: """加载纯文本文档""" with open(file_path, "r", encoding="utf-8") as f: content = f.read() return [Document( page_content=content, metadata={ "source": file_path, "title": os.path.basename(file_path), "category": "text" } )] def _create_document_from_data(self, data: Dict) -> Document: """从数据创建文档对象""" return Document( page_content=data.get("content", data.get("text", "")), metadata={ "source": data.get("url", data.get("source", "")), "title": data.get("title", ""), "category": data.get("category", "unknown"), "timestamp": data.get("timestamp", "") } ) def _split_documents(self, documents: List[Document]) -> List[Document]: """分割文档为块""" text_splitter = RecursiveCharacterTextSplitter( chunk_size=self.config.get("chunk_size", 1000), chunk_overlap=self.config.get("chunk_overlap", 200), length_function=len ) return text_splitter.split_documents(documents) def _create_vector_index(self, documents: List[Document]): """创建FAISS向量索引""" # 创建向量数据库 self.vector_db = FAISS.from_documents( documents=documents, embedding=self.embedding_model ) # 保存索引 os.makedirs(os.path.dirname(self.config["index_path"]), exist_ok=True) self.vector_db.save_local(self.config["index_path"]) # 创建文档ID映射 for idx, doc in enumerate(documents): self.doc_id_map[idx] = { "source": doc.metadata["source"], "title": doc.metadata["title"], "category": doc.metadata["category"] } # 保存映射表 map_path = os.path.join(os.path.dirname(self.config["index_path"]), "doc_id_map.pkl") with open(map_path, "wb") as f: pickle.dump(self.doc_id_map, f) print(f"✅ 向量索引已创建并保存至 {self.config[&#39;index_path&#39;]}") def _load_vector_index(self): """加载现有的FAISS向量索引""" index_path = self.config["index_path"] # 加载向量数据库 self.vector_db = FAISS.load_local( folder_path=index_path, embeddings=self.embedding_model ) # 加载文档映射表 map_path = os.path.join(os.path.dirname(index_path), "doc_id_map.pkl") if os.path.exists(map_path): with open(map_path, "rb") as f: self.doc_id_map = pickle.load(f) print(f"✅ 文档映射表已加载") else: print("⚠️ 文档映射表未找到,将使用空映射") def ask_question(self, question: str, history: Optional[List] = None) -> Dict: """提问并获取答案""" if self.vector_db is None: raise ValueError("向量数据库未初始化") # 创建检索器 retriever = self.vector_db.as_retriever( search_kwargs={ "k": self.config.get("retrieval_top_k", 5), "score_threshold": self.config.get("score_threshold", 0.4) } ) # 创建问答链 qa_chain = self._create_qa_chain(retriever) # 执行问答 result = qa_chain({"query": question}) # 提取源文档 source_docs = result["source_documents"] doc_ids = [doc.metadata.get("doc_id", idx) for idx, doc in enumerate(source_docs)] # 获取完整上下文 full_contexts = [self.doc_id_map.get(did, {"title": "未知", "source": ""}) for did in doc_ids] return { "question": question, "answer": result["result"], "source_documents": source_docs, "full_contexts": full_contexts } def _create_qa_chain(self, retriever) -> Any: """创建问答链""" # 自定义提示模板 prompt_template = """ 基于以下上下文信息,请以专业、准确的方式回答用户的问题。如果上下文信息不足以回答问题,请如实告知用户。 上下文信息: {context} 问题: {question} 请提供详细的回答: """ QA_PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) # 创建问答链 return RetrievalQA.from_chain_type( llm=self.llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": QA_PROMPT} ) def add_document(self, content: str, metadata: Dict): """添加单个文档到索引""" if self.vector_db is None: raise ValueError("向量数据库未初始化") # 创建文档对象 doc = Document(page_content=content, metadata=metadata) # 分割文档 split_docs = self._split_documents([doc]) # 添加到索引 self.vector_db.add_documents(split_docs) # 更新文档映射 start_idx = max(self.doc_id_map.keys()) + 1 if self.doc_id_map else 0 for idx, doc in enumerate(split_docs): self.doc_id_map[start_idx + idx] = { "source": doc.metadata["source"], "title": doc.metadata["title"], "category": doc.metadata["category"] } print(f"✅ 添加了 {len(split_docs)} 个文档块") def save_index(self): """保存索引到磁盘""" if self.vector_db is None: raise ValueError("向量数据库未初始化") # 保存索引 self.vector_db.save_local(self.config["index_path"]) # 保存映射表 map_path = os.path.join(os.path.dirname(self.config["index_path"]), "doc_id_map.pkl") with open(map_path, "wb") as f: pickle.dump(self.doc_id_map, f) print(f"✅ 索引已保存至 {self.config[&#39;index_path&#39;]}") def create_default_config() -> Dict: """创建默认配置 - 使用智谱AI""" return { "data_dir": "data", "index_path": "index/faiss_index", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_device": "cpu", "llm_provider": "zhipuai", # 默认使用智谱AI "zhipuai_api_key": "1fc6d23e95224503aa94bfcca6a31903.FXoiEbfDgymrE9FA", # 您的智谱AI密钥 "llm_model": "glm-4", # 智谱AI的GLM-4模型 "temperature": 0.7, "chunk_size": 1000, "chunk_overlap": 200, "retrieval_top_k": 5, "score_threshold": 0.4, "auto_download": True } def interactive_cli(): """交互式命令行界面 - 适配智谱AI""" config = create_default_config() # 设置智谱AI API密钥 api_key = input("请输入智谱AI API密钥(或直接回车使用默认值): ") if api_key.strip(): config["zhipuai_api_key"] = api_key # 选择模型 model_choice = input("请选择模型 (1=GLM-4, 2=GLM-3-Turbo, 回车使用GLM-4): ") if model_choice == "2": config["llm_model"] = "glm-3-turbo" # 初始化RAG系统 rag = RAGSystem(config) # 交互问答 print("\nRAG系统已就绪(使用智谱AI),输入问题开始查询(输入&#39;q&#39;退出)") history = [] while True: question = input("\n>>> 问题: ") if question.lower() in [&#39;q&#39;, &#39;quit&#39;, &#39;exit&#39;]: break try: result = rag.ask_question(question, history) # 显示结果 print(f"\n💡 答案: {result[&#39;answer&#39;]}") if result["full_contexts"]: print("\n📚 信息来源:") for i, ctx in enumerate(result["full_contexts"]): print(f" {i + 1}. {ctx[&#39;title&#39;]} ({ctx[&#39;category&#39;]})") print(f" 来源: {ctx[&#39;source&#39;]}") # 添加到历史 history.append({"question": question, "answer": result["answer"]}) except Exception as e: print(f"❌ 错误: {str(e)}") # 保存索引 rag.save_index() print("\n索引已保存,再见!") if __name__ == "__main__": interactive_cli() 】
07-08
请问这个类中的get_current_activity()方法,在其他用例文件,都有那些调用方式,方法如下: # !/usr/bin/env python # -*- coding: utf-8 -*- from typing import Optional import threading import subprocess import openpyxl import re from hytest import * import time import random from appium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import WebDriverException import os import zipfile import tempfile import shutil from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException import cv2 import numpy as np from lib.common import ADBHelper, get_app_driver, get_sup_app_driver, Operate from selenium.webdriver.common.by import By from PIL import Image import statistics from openpyxl import load_workbook from hytest import * from appium import webdriver import sys import os import re import time import platform import subprocess from appium.webdriver.common.multi_action import MultiAction from appium.webdriver.common.touch_action import TouchAction from selenium.webdriver.common.by import By from appium.webdriver.common.appiumby import AppiumBy from multiprocessing.dummy import Process from selenium.webdriver import ActionChains import yaml, ruamel.yaml import hytest from selenium.common.exceptions import ( TimeoutException, ElementNotInteractableException, StaleElementReferenceException, NoSuchElementException, WebDriverException ) ############################################################################### CURRENT_TIME = datetime.now().strftime(&#39;%Y%m%d%H%M%S&#39;) GLOBAL_REPORT_FOLDER = f"./reports/task_{CURRENT_TIME}_report" os.makedirs(GLOBAL_REPORT_FOLDER, exist_ok=True) class AppiumAutomationUtils: def __init__(self, device_name, platform_version, is_large_install=False): self.is_large_install = is_large_install # 根据是否为大文件安装场景设置超时 self.timeout = 300 if is_large_install else 30 self.desired_caps = { "platformName": "Android", "deviceName": device_name, "appium:platformVersion": platform_version, "appium:automationName": "UiAutomator2", "newCommandTimeout": self.timeout, "appium:adbExecTimeout": 60000 } self.driver = webdriver.Remote(&#39;http://localhost:4723/wd/hub&#39;, self.desired_caps) self.last_activity_time = time.time() self.folder_name = GLOBAL_REPORT_FOLDER self.report_file = os.path.join(self.folder_name, "report.xlsx") self.report_files = os.path.join(self.folder_name, "downapp_report.xlsx") self.report_checkfile = os.path.join(self.folder_name, "checkapp_report.xlsx") def restart_appium_server(self): # 启动Appium服务 start_cmd = &#39;appium -a 127.0.0.1 -p 4723 --session-override --allow-insecure=adb_shell&#39; subprocess.Popen(start_cmd, shell=True) time.sleep(10) @property def get_driver(self): return self.driver def start_app(self, package_name): crash_count, anr_count = 0, 0 flower_count, black_count, white_count = 0, 0, 0 try: self.driver.activate_app(package_name) time.sleep(5) start_time = time.time() while time.time() - start_time < 10: status = self.driver.query_app_state(package_name) if status != 2: if self.check_app_crash(): crash_count += 1 break if self.check_app_anr(): anr_count += 1 is_flower, is_black, is_white = self.verify_screen() if is_flower: flower_count += 1 if is_black: black_count += 1 if is_white: white_count += 1 time.sleep(3) except Exception as e: if self.check_app_crash(): crash_count += 1 elif self.check_app_anr(): anr_count += 1 raise RuntimeError(f"应用启动失败: {str(e)}") return crash_count, anr_count, flower_count, black_count, white_count def check_screen(self): timestamp = int(time.time() * 1000) temp_dir = "D:/problem/temp_screenshots/" os.makedirs(temp_dir, exist_ok=True) temp_path = os.path.join(temp_dir, f"temp_{timestamp}.png") try: screenshot = self.driver.get_screenshot_as_png() with open(temp_path, &#39;wb&#39;) as f: f.write(screenshot) except Exception as e: return False, False, False, None img_cv = cv2.imread(temp_path) if img_cv is None: os.remove(temp_path) return False, False, False, None height, width, _ = img_cv.shape total_pixels = height * width aspect_ratio = width / height if 1.5 <= aspect_ratio <= 2.0: num_rows, num_cols = 4, 8 elif aspect_ratio > 2.0: num_rows, num_cols = 3, 12 else: num_rows, num_cols = 3, 6 block_h, block_w = height // num_rows, width // num_cols block_list = [] FEATURE_THRESHOLDS = {&#39;variance&#39;: 150, &#39;entropy&#39;: 2.2, &#39;edge_density&#39;: 0.08, &#39;contrast&#39;: 40} for row in range(num_rows): for col in range(num_cols): y_start = max(0, row * block_h) y_end = min(height, (row + 1) * block_h) x_start = max(0, col * block_w) x_end = min(width, (col + 1) * block_w) block = img_cv[y_start:y_end, x_start:x_end] gray = cv2.cvtColor(block, cv2.COLOR_BGR2GRAY) h, w = gray.shape block_area = h * w variance = np.var(gray) hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten() prob = hist / (block_area + 1e-7) entropy = -np.sum(prob * np.log(prob + 1e-7)) edges = cv2.Canny(gray, 50, 150) edge_pixels = np.count_nonzero(edges) edge_density = edge_pixels / block_area contrast = np.max(gray) - np.min(gray) block_list.append({ &#39;variance&#39;: variance, &#39;entropy&#39;: entropy, &#39;edge_density&#39;: edge_density, &#39;contrast&#39;: contrast, &#39;area&#39;: block_area, &#39;coords&#39;: (x_start, y_start, x_end, y_end) }) # 原花屏检测逻辑 all_variances = [b[&#39;variance&#39;] for b in block_list] all_entropies = [b[&#39;entropy&#39;] for b in block_list] global_variance_mean = statistics.mean(all_variances) global_entropy_mean = statistics.mean(all_entropies) dynamic_thresholds = { &#39;variance&#39;: global_variance_mean - 1.5 * np.std(all_variances), &#39;entropy&#39;: global_entropy_mean - 1.5 * np.std(all_entropies) } flower_blocks = [b for b in block_list if sum([b[&#39;variance&#39;] < dynamic_thresholds[&#39;variance&#39;], b[&#39;entropy&#39;] < dynamic_thresholds[&#39;entropy&#39;], b[&#39;edge_density&#39;] > FEATURE_THRESHOLDS[&#39;edge_density&#39;], b[&#39;contrast&#39;] < FEATURE_THRESHOLDS[&#39;contrast&#39;]]) >= 3 and b[&#39;area&#39;] >= (total_pixels * 0.01)] is_flower_screen = (sum(b[&#39;area&#39;] for b in flower_blocks) / total_pixels) > 0.1 # 黑白屏检测逻辑 img_pil = Image.open(temp_path).convert(&#39;RGBA&#39;) color, total = (0, 0, 0), 0 for count, (r, g, b, a) in img_pil.getcolors(img_pil.size[0] * img_pil.size[1]): if a != 0: color = (color[0] + r * count, color[1] + g * count, color[2] + b * count) total += count if total > 0: dominant_color = (int(color[0] / total), int(color[1] / total), int(color[2] / total)) mean_brightness = statistics.mean(dominant_color) is_black_screen = mean_brightness < 10 is_white_screen = mean_brightness > 254 else: is_black_screen, is_white_screen = True, False return is_flower_screen, is_black_screen, is_white_screen, temp_path def verify_screen(self): """ 二次检测屏幕异常,减少检测屏幕异常误报率 """ save_dir = "D:/problem/problemphoto/" os.makedirs(save_dir, exist_ok=True) temp_files = [] # 第一次检测 first_flower, first_black, first_white, first_temp = self.check_screen() if first_temp: temp_files.append(first_temp) if not (first_flower or first_black or first_white): self.clean_temp_files(temp_files) return False, False, False time.sleep(0.8) second_flower, second_black, second_white, second_temp = self.check_screen() if second_temp: temp_files.append(second_temp) # 最终判定:两次同类异常才保存 final_flower = first_flower and second_flower final_black = first_black and second_black final_white = first_white and second_white # 截图保存逻辑 if final_flower or final_black or final_white: timestamp = time.strftime("%Y%m%d_%H%M%S", time.localtime()) anomaly_types = [] if final_flower: anomaly_types.append("flower") if final_black: anomaly_types.append("black") if final_white: anomaly_types.append("white") filename = f"{timestamp}_{&#39;_&#39;.join(anomaly_types)}.png" # 保存二次检测的临时截图到目标目录 shutil.copy(second_temp, os.path.join(save_dir, filename)) self.log_error() # 清理所有临时文件(无论是否保存) self.clean_temp_files(temp_files) return final_flower, final_black, final_white def clean_temp_files(self, temp_files): """ 辅助方法:安全删除临时文件 """ for path in temp_files: if os.path.exists(path): try: os.remove(path) except Exception as e: pass def install_app(self, apk_path): """使用 os.system 执行 adb 命令""" command = f"adb install -d -r -g {apk_path}" exit_code = os.system(command) if exit_code == 0: print("应用安装成功") else: print("安装失败") def random_operation(self, duration,package_name,timeout=25): crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count = 0, 0, 0, 0, 0 # 获取设备屏幕尺寸 screen = self.driver.get_window_size() screen_width = screen[&#39;width&#39;] screen_height = screen[&#39;height&#39;] start_time = time.time() last_operation_time = time.time() # 记录最后一次成功操作的时间 while time.time() - start_time < duration: # 检测全局超时 if time.time() - last_operation_time > timeout: print(f"操作超时({timeout}秒无响应),结束测试") break #驻留检测 try: currentt_package = self.get_current_package().lower() target_package = package_name.lower() if currentt_package != target_package: print(f"当前包名不匹配目标!!!") self.driver.activate_app(package_name) time.sleep(3) last_operation_time = time.time() except Exception as e: print(f"驻留检测失败:{e}") try: # 随机选择操作类型(点击/滑动/输入) operation = random.choice([&#39;click&#39;, &#39;swipe&#39;, &#39;input&#39;]) if operation == &#39;click&#39;: # 随机坐标点击 x = random.randint(0, screen_width) y = random.randint(0, screen_height) self.driver.tap([(x, y)], duration=50) time.sleep(0.3) elif operation == &#39;swipe&#39;: # 随机方向滑动(上下左右) direction = random.choice([&#39;up&#39;, &#39;down&#39;, &#39;left&#39;, &#39;right&#39;]) if direction == &#39;up&#39;: self.driver.swipe(screen_width // 2, screen_height * 3 // 4, screen_width // 2, screen_height // 4, 500) elif direction == &#39;down&#39;: self.driver.swipe(screen_width // 2, screen_height // 4, screen_width // 2, screen_height * 3 // 4, 500) elif direction == &#39;left&#39;: self.driver.swipe(screen_width * 3 // 4, screen_height // 2, screen_width // 4, screen_height // 2, 500) elif direction == &#39;right&#39;: self.driver.swipe(screen_width // 4, screen_height // 2, screen_width * 3 // 4, screen_height // 2, 500) time.sleep(0.5) elif operation == &#39;input&#39;: input_elements = self.driver.find_elements(AppiumBy.CLASS_NAME, "android.widget.EditText") if input_elements: input_element = random.choice(input_elements) input_element.click() random_digits = &#39;&#39;.join(str(random.randint(0, 9)) for _ in range(random.randint(1, 10))) input_element.send_keys(random_digits) time.sleep(0.8) if self.check_app_crash(): crash_count += 1 if self.check_app_anr(): anr_count += 1 is_flower, is_black, is_white = self.verify_screen() if is_flower: flower_screen_count += 1 if is_black: black_screen_count += 1 if is_white: white_screen_count += 1 last_operation_time = time.time() except Exception as e: pass return (crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count) def restart_app(self, times, package_name): """ 带重试逻辑的应用重启方法,支持 Activity 动态获取 :param times: 重启循环次数 :param package_name: 目标应用包名 """ crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count = 0, 0, 0, 0, 0 for _ in range(times): try: # 步骤1:终止应用并等待 self.driver.terminate_app(package_name) time.sleep(5) # 步骤2:尝试激活应用 self.driver.activate_app(package_name) time.sleep(5) except Exception as e: self.last_activity_time = time.time() retry_count = 0 while retry_count < 2: try: self.driver.activate_app(package_name) time.sleep(5) current_package = self.driver.current_package if package_name == current_package: break else: print(f"第{retry_count + 1}次启动未启动成功") retry_count += 1 time.sleep(5) except Exception as retry_e: continue if self.check_app_crash(): crash_count += 1 if self.check_app_anr(): anr_count += 1 is_flower, is_black, is_white = self.verify_screen() if is_flower: flower_screen_count += 1 if is_black: black_screen_count += 1 if is_white: white_screen_count += 1 return crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count def uninstall_app(self, package_name): try: self.driver.terminate_app(package_name) os.system(f"adb uninstall {package_name}") return True except: return False pass def generate_report(self, app_name, package_name, crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count, app_version, install_result, start_result,uninstall_result): if not os.path.exists(self.report_file): wb = openpyxl.Workbook() sheet = wb.active sheet.append(["序号", "应用名称", "包名", "应用版本号", "安装应用", "启动应用", "闪退次数", "ANR次数", "花屏次数", "黑屏次数", "白屏次数", "卸载结果","统计"]) row_number = 1 else: wb = openpyxl.load_workbook(self.report_file) sheet = wb.active row_number = len(sheet[&#39;A&#39;]) install_result_str = "成功" if install_result else "失败" start_result_str = "成功" if start_result else "失败" uninstall_result_str = "成功" if uninstall_result else "失败" has_failure = (not install_result) or (not start_result) or \ (crash_count > 0 or anr_count > 0 or flower_screen_count > 0 or black_screen_count > 0 or white_screen_count > 0) status = "fail" if has_failure else "pass" sheet.append([ row_number, app_name, package_name, app_version, install_result_str, start_result_str, crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count,uninstall_result_str, status ]) # 保存文件 wb.save(self.report_file) def generate_report_even_failed(self, app_name, package_name, crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count, app_version, install_result, start_result,uninstall_result): try: self.generate_report(app_name, package_name, crash_count, anr_count, flower_screen_count, black_screen_count, white_screen_count, app_version, install_result, start_result,uninstall_result) except Exception as e: print(f"生成报告时出错:{str(e)}") def log_error(self): current_timestamp = datetime.now().strftime(&#39;%Y%m%d%H%M%S&#39;) log_folder = f"D:/problem/logs/{current_timestamp}" os.makedirs(log_folder, exist_ok=True) adb_pull_command = f"pull /data/log/hilogs {log_folder}" ADBHelper().adb(adb_pull_command) time.sleep(10) adb_pull_command = f"pull /data/log/dropbox {log_folder}" ADBHelper().adb(adb_pull_command) time.sleep(10) def quit_driver(self): self.driver.quit() def click_element_by_texts(self, texts): """循环匹配文本点击页面文本元素""" screenshot_dir = "D:/problem/clickscreenshot" if not os.path.exists(screenshot_dir): os.makedirs(screenshot_dir) print(f"已创建截图文件夹:{screenshot_dir}") for text in texts: try: element = self.driver.find_element(AppiumBy.ANDROID_UIAUTOMATOR, f&#39;new UiSelector().text("{text}")&#39;) if element.is_enabled() and element.is_displayed(): element.click() return True except: continue else: screenshot_name = f"{int(time.time())}.png" screenshot_path = os.path.join(screenshot_dir, screenshot_name) self.driver.save_screenshot(screenshot_path) return False def get_app_version(self, package_name): """获取当前应用版本""" try: result = os.popen(f"adb shell dumpsys package {package_name} | findstr versionName").read().strip() if result: parts = result.split(&#39;=&#39;) if len(parts) > 1: return parts[1] return "未知版本" except: return "未知版本" def check_app_crash(self): """检测应用是否闪退""" try: current_activity = self.get_current_activity() home_activity = "com.huawei.android.launcher.unihome.UniHomeLauncher" if current_activity == home_activity: print(f"应用发生闪退!!!") self.scrennphoto_problem(problem_type="crash") self.log_error() return True else: return False except Exception as e: print(f"检测闪退时出错: {e}") return None def scrennphoto_problem(self, problem_type: str): """封装:截图并记录日志(根据问题类型生成不同目录)""" base_dir = "D:/problem/" screenshot_dir = os.path.join(base_dir, f"{problem_type}photo/") os.makedirs(screenshot_dir, exist_ok=True) timestamp = time.strftime(&#39;%Y%m%d_%H%M%S&#39;) screenshot_path = os.path.join(screenshot_dir, f"{problem_type}_{timestamp}.png") if self.driver.save_screenshot(screenshot_path): print(f"截图保存成功:{screenshot_path}") else: print(f"截图保存失败:{screenshot_path}") # def check_app_anr(self): # """判断当前页面是否存在ANR问题""" # try: # # 执行ANR检测命令 # result = subprocess.check_output( # "adb shell logcat -d | grep -i ANR", # shell=True, # text=True, # stderr=subprocess.STDOUT # ) # # 判断是否存在ANR # if "ANR" in result: # self.scrennphoto_problem(problem_type="anr") # self.log_error() # return True # return True # except Exception as e: # return False def check_app_anr(self): """判断当前页面是否存在ANR问题""" anr_keywords = ["无响应", "关闭应用", "是否将其关闭", "等待"] try: has_anr_screen = False for keyword in anr_keywords: elements = self.driver.find_elements( by=AppiumBy.XPATH, value=f"//*[contains(@text, &#39;{keyword}&#39;)]" ) if elements: has_anr_screen = True break if has_anr_screen: print(f"检测到ANR:日志存在ANR记录且屏幕显示无响应提示") self.scrennphoto_problem(problem_type="anr") self.log_error() return True else: return False except Exception as e: print(f"ANR检测异常:{str(e)}") return False def is_target_activity(self, package_name, activity): """判断当前 Activity 是否属于目标应用""" return activity and activity.startswith(package_name) def get_current_activity(self): """获取当前Android设备的Activity名称""" try: # 执行ADB命令获取窗口信息 command = "adb shell dumpsys window | findstr mCurrentFocus" result = subprocess.check_output(command, shell=True, text=True, timeout=5) except subprocess.CalledProcessError: return "错误:ADB命令执行失败,请检查设备连接" except subprocess.TimeoutExpired: return "错误:命令执行超时,请确认ADB服务正常" except Exception as e: return f"错误:{str(e)}" if not result.strip(): return "提示:未获取到窗口信息,请先打开一个应用" # 用正则表达式匹配/后面的内容 match = re.search(r&#39;/([^ }]+)&#39;, result) if match: return match.group(1) else: return "提示:未找到Activity名称,输出格式可能不一致" def get_current_package(self): """验证应用是否下载成功""" current_package = self.driver.current_package # 获取当前包名 return current_package def pull_download_apk(self): """ 从Android设备提取指定包名的APK文件 """ # 固定参数设置 adb_path = &#39;adb&#39; output_dir = &#39;D:\\apk&#39; # 获取当前应用包名 try: package_name = self.get_current_package() if not package_name: raise ValueError("无法获取当前应用包名") except Exception as e: raise RuntimeError(f"获取包名失败: {str(e)}") # 确保输出目录存在 # os.makedirs(output_dir, exist_ok=True) if not os.path.exists(output_dir): os.makedirs(output_dir) # 获取包安装路径 cmd_get_path = f"{adb_path} shell pm path {package_name}" try: result = subprocess.run(cmd_get_path, capture_output=True, text=True, shell=True, timeout=30) if result.returncode != 0: raise RuntimeError(f"获取包路径失败: {result.stderr.strip()}") # 解析输出结果 output = result.stdout.strip() if not output: raise ValueError(f"未找到包名为 &#39;{package_name}&#39; 的应用") # 提取APK路径 (取第一个路径) apk_paths = re.findall(r&#39;package:(.+)&#39;, output) if not apk_paths: raise ValueError(f"无法解析包路径: {output}") device_path = apk_paths[0].strip() print(f"设备路径: {device_path}") except subprocess.TimeoutExpired: raise RuntimeError("获取包路径超时,请检查设备连接") # 创建本地文件名和路径 local_filename = f"{package_name}.apk" local_path = os.path.join(output_dir, local_filename) # 执行pull命令 cmd_pull = f"{adb_path} pull {device_path} \"{local_path}\"" try: result = subprocess.run(cmd_pull, capture_output=True, text=True, shell=True, timeout=60) if result.returncode != 0: raise RuntimeError(f"提取APK失败: {result.stderr.strip()}") except subprocess.TimeoutExpired: raise RuntimeError("提取APK超时,文件可能过大") # 验证文件是否成功提取 if not os.path.exists(local_path): raise FileNotFoundError(f"文件提取失败: {local_path}") print(f"成功提取APK到: {local_path}") return local_path def check_downapp_verify(self, package_name, app_name): """检测应用是否下载正确(返回字典)""" try: current_package = self.get_current_package().lower() expected_package = package_name.lower() # 构造返回结果(包含状态、应用名、包名、原因) result = { "status": None, "app_name": app_name, "package_name": package_name, "reason": "" } if current_package == expected_package: return True else: result["status"] = False result["reason"] = f"下载应用包名不符" return result except Exception as e: return { "status": None, "app_name": app_name, "package_name": package_name, "reason": f"检测应用时出错: {e}" } def downapp_report(self, app_name, package_name, download_result, pullapk_result, remark=None): if not os.path.exists(self.report_files): wb = openpyxl.Workbook() sheet = wb.active sheet.append(["应用名称", "包名", "下载结果", "上传结果", "统计", "备注"]) row_number = 1 else: wb = openpyxl.load_workbook(self.report_files) sheet = wb.active row_number = len(sheet[&#39;A&#39;]) download_result_str = "成功" if download_result else "失败" pullapk_result_str = "成功" if pullapk_result else "失败" has_failure = (not download_result) or (not pullapk_result) status = "fail" if has_failure else "pass" sheet.append([ app_name, package_name, download_result_str, pullapk_result_str, status, remark ]) wb.save(self.report_files) def click_element_with_swipe(self,driver, target_id, target_text, timeout=10, max_swipe=0): """ 定位并点击同时满足ID和文本条件的元素(未找到时下滑重试),返回操作结果 """ uiautomator_selector = f&#39;new UiSelector().resourceId("{target_id}").textContains("{target_text}")&#39; located = False for attempt in range(max_swipe + 1): try: element = WebDriverWait(driver, timeout).until( EC.element_to_be_clickable(("-android uiautomator", uiautomator_selector)) ) element.click() print(f"成功点击元素(ID={target_id}, 文本={target_text})") located = True break except TimeoutException: if attempt < max_swipe: print(f"第{attempt + 1}次定位超时,尝试下滑...") self.swipe_down(driver) else: print(f"已尝试{max_swipe + 1}次,未找到符合条件的元素") except Exception as e: print(f"操作失败,原因:{str(e)}") located = False break return located def swipe_down(self, driver, duration=500, swipe_times=1): """ 动态计算坐标实现下滑(页面向下滚动) """ # 获取屏幕尺寸 window_size = driver.get_window_size() x = window_size["width"] y = window_size["height"] # x, y = ADBHelper().get_phone_size() x1 = x * 0.5 y1 = y * 0.9 x2 = x * 0.5 y2 = y * 0.2 for i in range(swipe_times): driver.swipe(x1, y1, x2, y2, duration) print(f"第{i + 1}/{swipe_times}次下滑操作完成,等待页面加载...") driver.implicitly_wait(3) print(f"全部{swipe_times}次下滑操作执行完毕") def swipe_up(self, driver, duration=500, swipe_times=1): """ 动态计算坐标实现上滑(页面向上滚动) """ # 获取屏幕尺寸 # window_size = driver.get_window_size() # x = window_size["width"] # y = window_size["height"] x, y = ADBHelper().get_phone_size() x1 = x * 0.5 y1 = y * 0.2 x2 = x * 0.5 y2 = y * 0.9 for i in range(swipe_times): driver.swipe(x1, y1, x2, y2, duration) print(f"第{i + 1}/{swipe_times}次上滑操作完成,等待页面加载...") driver.implicitly_wait(3) print(f"全部{swipe_times}次上滑操作执行完毕") def go_back(self, driver, times=1, interval=3): """ :param times: 要点击返回键的次数(需≥0,默认1次) :param interval: 每次点击的间隔时间(秒,默认3秒) """ # 执行返回键点击 try: for i in range(times): driver.press_keycode(4) print(f"已点击返回键(第{i + 1}/{times}次)") time.sleep(interval) return True except WebDriverException as e: print(f"返回键操作失败,原因:{str(e)}") pass return False def wait_element_click(self,driver, locator, timeout=600): """ 等待元素可点击后尝试点击,返回点击是否成功 """ try: element = WebDriverWait(driver, timeout).until( EC.element_to_be_clickable(locator) ) except Exception as e: return False try: element.click() return True except (ElementNotInteractableException, StaleElementReferenceException, WebDriverException): return False def download_AG_app(self, driver, app_name, package_name): market_package = "com.huawei.appmarket" try: for i in range(2): driver.press_keycode(187) try: driver.find_element(By.ID, &#39;com.huawei.android.launcher:id/clear_all_recents_image_button&#39;).click() except: pass driver.press_keycode(3) except: pass time.sleep(10) driver.activate_app(market_package) time.sleep(10) self.click_element_with_swipe(driver, target_id=&#39;com.huawei.appmarket:id/enter_button&#39;, target_text=&#39;暂不安装&#39;) self.click_element_with_swipe(driver, target_id=&#39;android:id/button2&#39;, target_text=&#39;以后再说&#39;) self.swipe_up(driver, swipe_times=3) driver.find_element(By.ID, &#39;com.huawei.appmarket:id/fixed_search_view&#39;).click() time.sleep(3) src_text = driver.find_element(By.ID, "com.huawei.appmarket:id/search_src_text") src_text.set_text(app_name) time.sleep(3) driver.find_element(By.ID, &#39;com.huawei.appmarket:id/hwsearchview_search_text_button&#39;).click() time.sleep(3) result1 = self.click_element_with_swipe(driver, target_id=&#39;com.huawei.appmarket:id/ItemTitle&#39;,target_text=f&#39;{app_name}&#39;, max_swipe=3) # 可以在应用市场搜索到该应用 if result1 == True: time.sleep(5) # 场景1:应用未安装 result2= self.click_element_with_swipe(driver, target_id=&#39;com.huawei.appmarket:id/hwprogressbutton_percentage_text_view&#39;,target_text=&#39;安装&#39;) if result2 == True: open_text = (AppiumBy.ANDROID_UIAUTOMATOR, &#39;new UiSelector().text("打开")&#39;) result3=self.wait_element_click(driver, open_text) # 应用规定时间内安装完成 if result3 == True: time.sleep(5) self.click_element_with_swipe(driver,target_id=&#39;com.android.permissioncontroller:id/permission_allow_button&#39;,target_text=&#39;允许&#39;) else: # 下载超时&开发者原因暂不支持下载 self.click_element_with_swipe(driver,target_id=&#39;com.huawei.appmarket:id/hwprogressbutton_percentage_text_view&#39;,target_text=&#39;%&#39;) self.go_back(driver, times=3) return { "status": "notime", "app_name": app_name, "package_name": package_name, "reason": f"{app_name}下载超时&开发者原因暂不支持下载 " } # 场景2:应用已存在 else: time.sleep(30) result4 = self.click_element_with_swipe(driver,target_id=&#39;com.huawei.appmarket:id/hwprogressbutton_percentage_text_view&#39;, target_text=&#39;打开&#39;) if result4 == True: time.sleep(5) self.click_element_with_swipe(driver, target_id=&#39;com.android.permissioncontroller:id/permission_allow_button&#39;,target_text=&#39;允许&#39;) else: pass else: failure_info = { "status": "fail1", "app_name": app_name, "package_name": package_name, "reason": f"应用市场未找到应用:{app_name}" } self.go_back(driver, times=3) return failure_info def check_apk_architecture(self,apk_path: str, app_name: str, package_name: str) -> dict: """ 检测APK架构并返回指定格式的结果字典 :param apk_path: APK文件路径 :param app_name: 应用名称(需外部传入) :param package_name: 应用包名(需外部传入) :return: 包含检测状态的字典(check_info) """ # 初始化默认结果(检测失败状态) check_info = { "status": "fail", "app_name": app_name, "package_name": package_name, "remark": "检测失败" } x64_archs = {&#39;arm64-v8a&#39;, &#39;x86_64&#39;, &#39;mips64&#39;} x32_archs = {&#39;armeabi&#39;, &#39;armeabi-v7a&#39;, &#39;x86&#39;, &#39;mips&#39;} detected_64 = set() detected_32 = set() try: if not os.path.isfile(apk_path): check_info["remark"] = "检测失败:APK文件不存在" return check_info with zipfile.ZipFile(apk_path, &#39;r&#39;) as zip_ref: all_members = zip_ref.namelist() for member in all_members: member = member.replace(&#39;\\&#39;, &#39;/&#39;) if member.startswith("lib/"): lib_subpath = member.split("lib/")[1].split(&#39;/&#39;) if len(lib_subpath) < 1: continue arch_dir = lib_subpath[0].lower() if not arch_dir: continue if arch_dir in x32_archs: detected_32.add(arch_dir) elif arch_dir in x64_archs: detected_64.add(arch_dir) # 判断检测结果 has_64bit = len(detected_64) > 0 if has_64bit: # 64位检测成功 check_info.update({ "status": "success", "remark": "应用64位,已保留" }) else: # 32位检测成功 os.remove(apk_path) check_info.update({ "status": "success", "remark": "应用32位,已删除" }) except Exception as e: check_info["remark"] = f"检测失败:{e}" return check_info def checkapp_report(self, app_name, package_name, check_result): if not os.path.exists(self.report_checkfile): wb = openpyxl.Workbook() sheet = wb.active sheet.append(["应用名称", "包名", "检测结果"]) row_number = 1 else: wb = openpyxl.load_workbook(self.report_checkfile) sheet = wb.active row_number = len(sheet[&#39;A&#39;]) sheet.append([ app_name, package_name, check_result ]) wb.save(self.report_checkfile)
最新发布
07-26
import socket import subprocess import websocket import time import os import threading import json import pyaudio import requests import hashlib import base64 from audioplayer import AudioPlayer import numpy as np from runner import set_global_var, get_global_var device_status = {} def listen_devices(): try: # 检测设备连接状态 result = subprocess.check_output("adb devices", shell=True).decode() current_devices = set(line.split(&#39;\t&#39;)[0] for line in result.splitlines()[1:] if line) # 检测新连接设备 for dev in current_devices - set(device_status.keys()): print(f"[设备已连接] {dev}") device_status[dev] = "connected" # 检测断开设备 for dev in set(device_status.keys()) - current_devices: print(f"[设备已断开连接] {dev}") del device_status[dev] time.sleep(1) except Exception as e: print(f"设备监控错误: {e}") def pcm_to_utf8(pcm_data: bytearray) -> str: """将16位PCM音频数据转为UTF-8字符串""" def validate_pcm(data: bytearray) -> bool: """验证PCM数据有效性""" return len(data) % 2 == 0 # 16位PCM需为偶数长度 if not validate_pcm(pcm_data): raise ValueError("无效的PCM数据长度,16位PCM需为偶数长度") try: # 转为16位有符号整数数组(小端序) samples = np.frombuffer(pcm_data, dtype=&#39;<i2&#39;) # 标准化到0-255范围 normalized = ((samples - samples.min()) * (255 / (samples.max() - samples.min()))).astype(np.uint8) # 转换为UTF-8字符串 return bytes(normalized).decode(&#39;utf-8&#39;, errors=&#39;replace&#39;) except Exception as e: raise RuntimeError(f"转换失败: {str(e)}") # 打印前32字节的十六进制表示 def parse_packets(buffer): """解析接收到的数据包""" # 解析数据包 end_marker = b&#39;\n\n&#39; while buffer.find(end_marker) != -1: packet_bytes = buffer[:buffer.find(end_marker) + len(end_marker)] buffer = buffer[buffer.find(end_marker) + len(end_marker):] try: json_bytes = packet_bytes[:-len(end_marker)] json_str = json_bytes.decode(&#39;utf-8&#39;) packet = json.loads(json_str) # 处理数据包 packet_type = packet.get("type") if packet_type == "recording": audio_data = base64.b64decode(packet.get("data", "")) print(&#39;audio_data &#39;, audio_data) return audio_data elif packet_type in ["startRecorder", "stopRecord"]: pass # command_callback(packet_type) else: print(f"未知数据包类型: {packet_type}") except json.JSONDecodeError as e: print(f"JSON解析错误: {e}") except Exception as e: print(f"数据包处理错误: {e}") def start_server(port=35000): adb_path = "adb.exe" os.system(f"adb forward tcp:{port} tcp:30000") with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect((&#39;localhost&#39;, port)) #s.bind((&#39;0.0.0.0&#39;, port)) #s.listen(5) print(f"服务器已启动,正在监听端口 {port}...") while True: threading.Thread(target=listen_devices).start() #client_socket, addr = s.accept() #print(f"接收到来自 {addr} 的连接") buffer = bytearray() try: while True: data = s.recv(4096) if data == b&#39;&#39;: pass else: print(&#39;data&#39;, data) #buffer.extend(data) if not data: print("连接断开") break buffer.extend(data) hex_preview = parse_packets(buffer) handle_audio_chunk(hex_preview) print(&#39;hex_preview&#39;,hex_preview) &#39;&#39;&#39;if data==b&#39;&#39;: pass else: if len(data) > 0: hex_preview = &#39; &#39;.join(f&#39;{b:02x}&#39; for b in data[:32]) print(f"前32字节: {hex_preview}...") #handle_audio_chunk(hex_preview) # 调试用:将PCM转为UTF-8 if len(data) < 1024: try: utf8_data = pcm_to_utf8(data) print(f"UTF-8预览: {utf8_data[:30]}...") #handle_audio_chunk(utf8_data) except: pass&#39;&#39;&#39; except Exception as e: print(f"接收音频数据异常: {e}") # 全局配置信息 # 播放时是否停止收音 stop_recording_when_playing = True # 打断播放的语音指令 stop_playing_words = ["别说了", "停止", "停下"] # 说话人id voice_speaker_id = 5199 # 语音活动检测-静音时间长度,超过这个时间视为停止说话 vad_silent_time = 1.5 # 语音合成参数 tts_params = {"lan": "zh", "cuid": "test-1234", "ctp": 1, "pdt":993, "spd":5, "pit": 5,"aue": 3} # 语音识别开始指令参数 asr_params = { "type": "START", "data": { "dev_pid": 1912, "dev_key": "com.baidu.open", "format": "pcm", "sample": 16000, "cuid": "my_test_dev", "type": 1, "asr_type": 1, "need_mid": False, "need_session_finish": True } } # 全局状态变量 ws_running = False ws_object = None recorder_running = False sound_play_list = [] current_audio_player = None chat_running = False current_query = &#39;&#39; last_asr_time = 0 def ws_send_start_command(ws): message = json.dumps(asr_params) ws.send_text(message) def ws_send_stop_command(ws): # 发送数据 msg_data = { "type": "FINISH", } message = json.dumps(msg_data) ws.send_text(message) def on_ws_message(ws, message): global current_query, last_asr_time data = json.loads(message) cmd_type = data.get("type") if cmd_type == &#39;MID_TEXT&#39;: mid_text = data.get("result") set_global_var("voicebot.asr.mid_text", mid_text) last_asr_time = time.time() # print("voicebot.asr.mid_text:", mid_text) elif cmd_type == "FIN_TEXT": query = data.get("result") # print("asr result:", query) set_global_var("voicebot.asr.result", query) last_asr_time = time.time() if query and len(query) > 0: current_query += query set_global_var("voicebot.chat.query", current_query) if ws_running == False: ws.close() def on_ws_close(ws, close_status_code, close_msg): print("websocket closed:", close_status_code, close_msg) def on_ws_error(ws, error): print(f"websocket Error: {error}") ws.close() def on_ws_open(ws): print("websocket connection opened:", ws) ws_send_start_command(ws) def check_chat(query:str): # for word in stop_playing_words: # if word in query: # stop_sound_player() # return False # if query in stop_playing_words: # stop_sound_player() # return False if is_playing_or_chatting(): return False return True def stop_sound_player(): global chat_running if current_audio_player: current_audio_player.stop() if len(sound_play_list) > 0: sound_play_list.clear() chat_running = False def run_chat(query:str): global chat_running chat_running = True set_global_var("voicebot.chat.query", query) params = {"query": query} params[&#39;username&#39;] = get_global_var("voicebot.username") params[&#39;password&#39;] = get_global_var("voicebot.password") response = requests.post("http://127.0.0.1:8010/chat", json=params, stream=True) total_reply = &#39;&#39; buffer = &#39;&#39; for line in response.iter_lines(): if line and chat_running: text = line.decode(&#39;utf-8&#39;) data = json.loads(text[5:]) content = data.get("content") buffer += content buffer = extract_play_text(buffer) total_reply += content set_global_var("voicebot.chat.reply", total_reply) # print(content, end=&#39;&#39;, flush=True) chat_running = False buffer = buffer.strip() if len(buffer) > 0: add_play_text(buffer) time.sleep(1) set_global_var("voicebot.chat.query", None) set_global_var("voicebot.chat.reply", None) #提取播放文本 def extract_play_text(total_text:str): separators = ",;。!?:,.!?\n" last_start_pos = 0 min_sentence_length = 4 for i in range(0, len(total_text)): if total_text[i] in separators and i - last_start_pos >= min_sentence_length: text = total_text[last_start_pos: i + 1] last_start_pos = i + 1 add_play_text(text.strip()) return total_text[last_start_pos:] #添加播放文本 def add_play_text(text:str): # print("add play text:", text) if len(text) > 1: sound_play_list.append({"text": text, "mp3_file": None}) # 语音合成 下载声音文件 def download_sound_file(text:str, speaker:int=None): if speaker is None: speaker = voice_speaker_id # print("tts create:", text) mp3_path = "sounds/" + str(speaker) if not os.path.exists(mp3_path): os.mkdir(mp3_path) mp3_file = mp3_path + "/" + hashlib.md5(text.encode(&#39;utf-8&#39;)).hexdigest() + ".mp3" if os.path.exists(mp3_file): return mp3_file params = tts_params params[&#39;per&#39;] = speaker params[&#39;text&#39;] = text url = "http://25.83.75.1:8088/Others/tts/text2audio/json" response = requests.post(url, json=params) data = response.json() if data[&#39;success&#39;] == False: binary_array = json.loads(data[&#39;message&#39;][&#39;message&#39;]) binary_data = bytes(binary_array) string_data = binary_data.decode(&#39;utf-8&#39;, errors=&#39;replace&#39;) data = json.loads(string_data) return "sounds/tts-failed.mp3" else: b64_string = data[&#39;result&#39;].get(&#39;data&#39;) mp3_data = base64.b64decode(b64_string) with open(mp3_file, &#39;wb&#39;) as file: file.write(mp3_data) return mp3_file #开始聊天 def is_playing_or_chatting(): return len(sound_play_list) > 0 #播放下一个声音 def play_next_sound(): global sound_play_list, current_audio_player item = sound_play_list[0] mp3_file = item.get("mp3_file") if mp3_file: player = AudioPlayer(mp3_file) current_audio_player = player try: player.play(block=True) except Exception as e: print("player exception:" + e) current_audio_player = None # print("remained sound:", len(sound_play_list)) if len(sound_play_list) > 0: sound_play_list.pop(0) #运行websocket def run_websocket(): global ws_running, ws_object ws_running = True uri = "ws://25.83.75.1:8088/Others/asr/realtime_asr?sn=voicebot" ws = websocket.WebSocketApp(uri, on_message=on_ws_message, on_close=on_ws_close, on_error=on_ws_error) ws_object = ws ws.on_open = on_ws_open ws.run_forever() ws_running = False # print("websocket end") #开始记录 def start_recorder(chuck_size:int=2560): audio = pyaudio.PyAudio() try: stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=chuck_size) return audio, stream except: print("打开麦克风失败") return None, None #获得不发音的时间 def get_silent_chunk(duration:float=0.16): sample_rate = 16000 # 采样率 num_samples = int(sample_rate * duration) # 计算样本数 silent_data = np.zeros(num_samples, dtype=np.int16) silent_bytes = silent_data.tobytes() return silent_bytes #处理音频块 def handle_audio_chunk(chunk_data:bytes): # 接受外部是否收音的要求 recording = get_global_var("voicebot.recording") if ws_object and ws_object.sock and ws_object.sock.connected: if recording == False or (stop_recording_when_playing and is_playing_or_chatting()): # print("ignor audio chunk:", sound_play_list, chat_running) ws_object.send_bytes(get_silent_chunk()) else: ws_object.send_bytes(chunk_data) #运行录音机 def run_recorder(audio=None, stream=None, chuck_size=2560): global recorder_running recorder_running = True set_global_var("voicebot.recording", True) while recorder_running: chunk_data = stream.read(chuck_size) print(&#39;chunk_data)&#39;,chunk_data) handle_audio_chunk(chunk_data) stream.stop_stream() stream.close() audio.terminate() # print("recorder end") #运行检查 def run_check(): global ws_running, recorder_running, current_query set_global_var("voicebot.running", True) while ws_running and recorder_running: time.sleep(1) if get_global_var("voicebot.running") == False: break if len(current_query) > 0 and last_asr_time > 0 and time.time() - last_asr_time > vad_silent_time: t = threading.Thread(target=run_chat, args=(current_query,)) t.start() current_query = &#39;&#39; ws_running = recorder_running = False set_global_var("voicebot.running", False) # print("语音助手已经停止") #运行播放机 def run_player(): while ws_running and recorder_running: time.sleep(0.1) if len(sound_play_list) > 0: play_next_sound() def run_tts(): while ws_running and recorder_running: time.sleep(0.1) for item in sound_play_list: if item.get("mp3_file") is None: item[&#39;mp3_file&#39;] = download_sound_file(item[&#39;text&#39;]) def run(): active_threads = threading.enumerate() # 打印每个活跃线程的信息 for t in active_threads: if t.name == &#39;voicebot-runner&#39;: return "语音助手已经在运行中了" audio, stream = start_recorder() if audio is None or stream is None: return {"error": "语音助手开启失败,无法访问麦克风"} t = threading.Thread(target=run_websocket) t.daemon = True t.start() t=threading.Thread(target=start_server()) t.daemon = True t.start() t = threading.Thread(target=run_check, name=&#39;voicebot-runner&#39;) t.daemon = True t.start() t = threading.Thread(target=run_tts) t.daemon = True t.start() t = threading.Thread(target=run_player) t.daemon = True t.start() return "执行成功" if __name__ == "__main__": #run() start_server() 把这个TTS的功能融入到第一个脚本里面生成新脚本,并且修改安卓的代码package com.example.demoapplication; import android.Manifest; import android.content.pm.PackageManager; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.Build; import android.os.Bundle; import android.os.Handler; import android.os.Looper; import android.os.Message; import android.speech.tts.TextToSpeech; import android.util.Base64; import android.util.Log; import android.widget.Button; import android.widget.Toast; import androidx.annotation.NonNull; import androidx.appcompat.app.AppCompatActivity; import androidx.core.app.ActivityCompat; import androidx.core.content.ContextCompat; import org.json.JSONException; import org.json.JSONObject; import java.io.BufferedWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.net.ServerSocket; import java.net.Socket; import java.util.Locale; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; public class MainActivity extends AppCompatActivity implements TextToSpeech.OnInitListener { private static final String TAG = "AudioRecorder"; private Button startRecordButton; private Button stopRecordButton; private Button uploadButton; // 音频录制相关 private AudioRecord audioRecord; private static final int SAMPLE_RATE = 44100; // 音频采样率 private static final int BUFFER_SIZE; // 静态代码块用于初始化缓冲区大小 static { int minBufferSize = 0; if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.CUPCAKE) { minBufferSize = AudioRecord.getMinBufferSize( SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT ); } // 确保缓冲区大小有效 BUFFER_SIZE = Math.max(minBufferSize, 4096); } // 多线程任务调度器 private ScheduledExecutorService scheduler; private AtomicBoolean isRecording = new AtomicBoolean(false); // 录音状态标志 private static final int PERMISSION_REQUEST_CODE = 1; // 权限请求码 // 线程池服务 private final ExecutorService executorService = Executors.newCachedThreadPool(); // 网络服务器相关 private ServerSocket serverSocket; private volatile boolean isServerRunning = true; // 服务器运行状态 private volatile Socket clientSocket; // 客户端Socket连接 private volatile BufferedWriter socketWriter; // Socket写入流 // 文本转语音(TTS)相关变量 private TextToSpeech ttsEngine; private boolean isTtsInitialized = false; // 主线程消息处理器,用于UI更新 private final Handler handler = new Handler(Looper.getMainLooper()) { @Override public void handleMessage(@NonNull Message msg) { switch (msg.what) { case 0x11: // 客户端连接成功 Toast.makeText(MainActivity.this, "客户端已连接", Toast.LENGTH_SHORT).show(); break; case 0x12: // 开始录音 Toast.makeText(MainActivity.this, "开始录音", Toast.LENGTH_SHORT).show(); break; case 0x13: // 数据发送成功 // 减少Toast频率,避免刷屏 if (Math.random() < 0.1) { // 10%概率显示 Toast.makeText(MainActivity.this, "录音数据已发送", Toast.LENGTH_SHORT).show(); } break; case 0x14: // 停止录音 Toast.makeText(MainActivity.this, "停止录音", Toast.LENGTH_SHORT).show(); break; case 0x15: // 控制指令 Toast.makeText(MainActivity.this, "收到控制指令:" + msg.obj.toString(), Toast.LENGTH_SHORT).show(); break; case 0x16: // 错误消息 Toast.makeText(MainActivity.this, "错误: " + msg.obj.toString(), Toast.LENGTH_LONG).show(); break; case 0x17: // 网络状态 Toast.makeText(MainActivity.this, "网络: " + msg.obj.toString(), Toast.LENGTH_SHORT).show(); break; } } }; /** * Activity创建时调用,进行初始化操作。 * @param savedInstanceState 保存的状态数据 */ @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); // 初始化TTS引擎 ttsEngine = new TextToSpeech(this, this); initViews(); // 初始化视图组件 setupClickListeners(); // 设置点击事件监听器 checkPermissions(); // 检查权限 startServer(30000); // 启动服务器,端口30000 } /** * 初始化UI视图组件 */ private void initViews() { startRecordButton = findViewById(R.id.startRecordButton); stopRecordButton = findViewById(R.id.stopRecordButton); uploadButton = findViewById(R.id.uploadButton); stopRecordButton.setEnabled(false); uploadButton.setEnabled(false); } /** * 设置按钮点击事件监听器 */ private void setupClickListeners() { startRecordButton.setOnClickListener(v -> startRecording()); stopRecordButton.setOnClickListener(v -> stopRecording()); uploadButton.setOnClickListener(v -> uploadRecording()); } /** * 检查录音权限并请求必要权限 */ private void checkPermissions() { if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSION_REQUEST_CODE); } } /** * 开始录音操作 */ private void startRecording() { // 检查权限 if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { sendErrorMessage("没有录音权限"); return; } // 检查是否正在录音 if (isRecording.get() || audioRecord != null) { sendErrorMessage("录音已在进行中"); return; } // 检查网络连接 if (clientSocket == null || clientSocket.isClosed() || socketWriter == null) { sendErrorMessage("客户端未连接,无法录音"); return; } try { // 初始化 AudioRecord audioRecord = new AudioRecord( MediaRecorder.AudioSource.MIC, SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, BUFFER_SIZE ); // 检查初始化状态 if (audioRecord.getState() != AudioRecord.STATE_INITIALIZED) { throw new IllegalStateException("AudioRecord 初始化失败"); } // 开始录音 audioRecord.startRecording(); isRecording.set(true); // 更新按钮状态 startRecordButton.setEnabled(false); stopRecordButton.setEnabled(true); uploadButton.setEnabled(false); // 创建定时任务发送音频数据 scheduler = Executors.newSingleThreadScheduledExecutor(); scheduler.scheduleAtFixedRate(this::uploadAudioData, 0, 100, TimeUnit.MILLISECONDS); // 提高发送频率 handler.sendEmptyMessage(0x12); // 发送开始录音的消息 // 发送开始录音控制指令 sendControlPacket("startRecorder"); // 播放TTS提示音 playTts("开始录音"); } catch (Exception e) { Log.e(TAG, "录音启动失败", e); sendErrorMessage("录音启动失败: " + e.getMessage()); releaseAudioResources(); } } /** * 停止录音操作 */ private void stopRecording() { if (!isRecording.get()) return; isRecording.set(false); releaseAudioResources(); // 更新按钮状态 stopRecordButton.setEnabled(false); uploadButton.setEnabled(true); handler.sendEmptyMessage(0x14); // 发送停止录音的消息 // 发送停止录音控制指令 sendControlPacket("stopRecor"); // 播放TTS提示音 playTts("停止录音"); } /** * 使用TTS播放指定文本 * @param text 要播放的文本内容 */ private void playTts(String text) { if (isTtsInitialized) { // 使用系统TTS播放 ttsEngine.speak(text, TextToSpeech.QUEUE_FLUSH, null); Log.i(TAG, "播放TTS: " + text); } else { Log.w(TAG, "TTS未初始化,无法播放: " + text); } } /** * 释放音频资源 */ private void releaseAudioResources() { if (audioRecord != null) { try { if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { audioRecord.stop(); } } catch (IllegalStateException e) { Log.e(TAG, "停止录音失败", e); } audioRecord.release(); audioRecord = null; } if (scheduler != null) { scheduler.shutdownNow(); scheduler = null; } } /** * 上传音频数据到服务器 */ private void uploadAudioData() { if (!isRecording.get() || clientSocket == null || clientSocket.isClosed() || socketWriter == null) { Log.w(TAG, "无法发送音频数据: 录音未进行或客户端未连接"); return; } byte[] buffer = new byte[BUFFER_SIZE]; try { int bytesRead = audioRecord.read(buffer, 0, BUFFER_SIZE); if (bytesRead > 0) { // 创建JSON数据包 JSONObject json = new JSONObject(); json.put("type", "recording"); json.put("data", Base64.encodeToString(buffer, 0, bytesRead, Base64.NO_WRAP)); // 使用NO_WRAP避免换行符 // 发送数据 synchronized (this) { if (socketWriter != null) { socketWriter.write(json.toString()); socketWriter.write("\n\n"); // 添加双换行作为结束标识 socketWriter.flush(); } } handler.sendEmptyMessage(0x13); // 发送录音数据的消息 } } catch (Exception e) { Log.e(TAG, "发送音频数据失败", e); sendErrorMessage("发送音频数据失败: " + e.getMessage()); } } /** * TTS初始化回调方法 * @param status 初始化状态 */ @Override public void onInit(int status) { if (status == TextToSpeech.SUCCESS) { // 设置默认语言为中文 int result = ttsEngine.setLanguage(Locale.CHINESE); if (result == TextToSpeech.LANG_MISSING_DATA || result == TextToSpeech.LANG_NOT_SUPPORTED) { Log.e(TAG, "TTS语言不支持中文"); } else { isTtsInitialized = true; Log.i(TAG, "TTS初始化成功,语言设置为中文"); } } else { Log.e(TAG, "TTS初始化失败"); } } /** * 发送控制指令包 * @param type 控制指令类型 */ private void sendControlPacket(String type) { if (clientSocket == null || clientSocket.isClosed() || socketWriter == null) { sendErrorMessage("无法发送控制指令: 客户端未连接"); return; } try { JSONObject packet = new JSONObject(); packet.put("type", type); packet.put("data", JSONObject.NULL); synchronized (this) { if (socketWriter != null) { socketWriter.write(packet.toString()); socketWriter.write("\n\n"); // 双换行作为结束标识 socketWriter.flush(); } } Log.i(TAG, "控制指令发送成功: " + type); } catch (Exception e) { Log.e(TAG, "发送控制指令失败", e); sendErrorMessage("发送控制指令失败: " + e.getMessage()); } } /** * 发送错误消息 * @param message 错误信息 */ private void sendErrorMessage(String message) { Message msg = handler.obtainMessage(0x16, message); handler.sendMessage(msg); } /** * 发送网络状态消息 * @param message 网络状态信息 */ private void sendNetworkMessage(String message) { Message msg = handler.obtainMessage(0x17, message); handler.sendMessage(msg); } /** * 上传录音文件(当前模式下无实际作用) */ private void uploadRecording() { Toast.makeText(this, "该模式下无需上传文件,已实时发送", Toast.LENGTH_SHORT).show(); } /** * 启动服务器监听 * @param port 监听端口号 */ private void startServer(int port) { executorService.execute(() -> { try { serverSocket = new ServerSocket(port); Log.i(TAG, "服务器启动,监听端口: " + port); sendNetworkMessage("服务器启动"); while (isServerRunning) { try { Socket socket = serverSocket.accept(); clientSocket = socket; // 创建输出流 synchronized (this) { socketWriter = new BufferedWriter( new OutputStreamWriter(socket.getOutputStream(), "UTF-8")); } handler.sendEmptyMessage(0x11); // 发送客户端连接成功的消息 Log.i(TAG, "客户端已连接: " + socket.getInetAddress()); sendNetworkMessage("客户端已连接"); // 启动双向通信处理 executorService.execute(() -> startCommunication(socket)); } catch (IOException e) { if (isServerRunning) { Log.e(TAG, "接受连接失败", e); sendErrorMessage("接受连接失败: " + e.getMessage()); } } } } catch (IOException e) { Log.e(TAG, "服务器启动失败", e); runOnUiThread(() -> Toast.makeText(MainActivity.this, "服务器启动失败: " + e.getMessage(), Toast.LENGTH_LONG).show()); } finally { closeServerSocket(); } }); } /** * 开始与客户端的通信 * @param socket 客户端Socket连接 */ private void startCommunication(Socket socket) { try (java.io.BufferedReader reader = new java.io.BufferedReader( new java.io.InputStreamReader(socket.getInputStream(), "UTF-8"))) { StringBuilder packetBuilder = new StringBuilder(); int c; while ((c = reader.read()) != -1 && isServerRunning) { char ch = (char) c; packetBuilder.append(ch); // 检测到连续两个换行符,表示一个完整的数据包结束 if (packetBuilder.length() >= 2 && packetBuilder.charAt(packetBuilder.length() - 2) == &#39;\n&#39; && packetBuilder.charAt(packetBuilder.length() - 1) == &#39;\n&#39;) { String packet = packetBuilder.toString().trim(); packetBuilder.setLength(0); // 清空构建器 if (!packet.isEmpty()) { try { JSONObject jsonObject = new JSONObject(packet); handleReceivedPacket(jsonObject); } catch (JSONException e) { Log.w(TAG, "JSON解析失败: " + packet, e); } } } } } catch (IOException e) { if (isServerRunning) { Log.e(TAG, "通信中断", e); runOnUiThread(() -> Toast.makeText(MainActivity.this, "通信中断: " + e.getMessage(), Toast.LENGTH_SHORT).show()); } } finally { closeSocket(socket); } } /** * 处理接收到的数据包 * @param jsonObject 接收到的JSON数据包 */ private void handleReceivedPacket(JSONObject jsonObject) { try { String type = jsonObject.getString("type"); Object data = jsonObject.opt("data"); // 发送消息到主线程进行显示 Message msg = handler.obtainMessage(0x15, type + ": " + data); handler.sendMessage(msg); Log.i(TAG, "收到控制指令: " + type); // 根据不同类型执行不同操作 switch (type) { case "start_recording": runOnUiThread(this::startRecording); break; case "stop_recording": runOnUiThread(this::stopRecording); break; case "ping": sendResponse("pong"); break; } } catch (JSONException e) { Log.e(TAG, "处理数据包失败", e); } } /** * 发送响应给客户端 * @param responseType 响应类型 */ private void sendResponse(String responseType) { if (clientSocket == null || clientSocket.isClosed() || socketWriter == null) return; try { JSONObject response = new JSONObject(); response.put("type", responseType); response.put("data", ""); synchronized (this) { if (socketWriter != null) { socketWriter.write(response.toString()); socketWriter.write("\n\n"); socketWriter.flush(); } } Log.i(TAG, "发送响应: " + responseType); } catch (Exception e) { Log.e(TAG, "发送响应失败", e); } } /** * 关闭指定的Socket连接 * @param socket 要关闭的Socket */ private void closeSocket(Socket socket) { try { if (socket != null && !socket.isClosed()) { socket.close(); } } catch (IOException e) { Log.w(TAG, "关闭Socket失败", e); } // 如果是当前客户端Socket,重置引用 if (socket == clientSocket) { clientSocket = null; synchronized (this) { socketWriter = null; } sendNetworkMessage("客户端断开连接"); } } /** * 关闭服务器Socket */ private void closeServerSocket() { try { if (serverSocket != null && !serverSocket.isClosed()) { serverSocket.close(); } } catch (IOException e) { Log.w(TAG, "关闭ServerSocket失败", e); } } /** * Activity销毁时调用,释放所有资源 */ @Override protected void onDestroy() { super.onDestroy(); isServerRunning = false; // 关闭TTS引擎 if (ttsEngine != null) { ttsEngine.stop(); ttsEngine.shutdown(); } // 关闭所有资源 closeServerSocket(); closeSocket(clientSocket); executorService.shutdownNow(); releaseAudioResources(); Log.i(TAG, "应用已销毁"); sendNetworkMessage("服务已停止"); } /** * 权限请求结果回调 * @param requestCode 请求码 * @param permissions 请求的权限数组 * @param grantResults 权限授予结果 */ @Override public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) { super.onRequestPermissionsResult(requestCode, permissions, grantResults); if (requestCode == PERMISSION_REQUEST_CODE) { if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) { Toast.makeText(this, "录音权限已授予", Toast.LENGTH_SHORT).show(); } else { Toast.makeText(this, "录音权限被拒绝", Toast.LENGTH_SHORT).show(); } } } }
07-01
请帮忙分析代码,并给出可行性的优化方案 import os import pandas as pd import numpy as np import re import chardet import csv import time import gc import psutil from sqlalchemy import create_engine, text from datetime import datetime from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed from tenacity import retry, stop_after_attempt, wait_fixed base_path = r&#39;D:\OneDrive\ERIC\维护\综资数据\20250601&#39; database_url = &#39;mysql+pymysql://root:root@127.0.0.1:3306/test&#39; def custom_field_handler(field): """自定义字段处理函数,使用正则表达式""" if isinstance(field, str): field = field.strip() if field in (&#39;&#39;, &#39;\\N&#39;): return None return re.sub(r&#39;["\n\r]&#39;, &#39;&#39;, field) return field def detect_encoding_and_separator(filepath): """自动检测CSV文件的编码和分隔符""" encodings = [&#39;utf-8&#39;, &#39;utf-8-sig&#39;, &#39;gbk&#39;, &#39;gb2312&#39;, &#39;iso-8859-1&#39;] best_sep = None max_columns = 0 detected_encoding = None for encoding in encodings: try: with open(filepath, &#39;rb&#39;) as f: raw_data = f.read(10000) detected = chardet.detect(raw_data) detected_encoding = detected[&#39;encoding&#39;] with open(filepath, &#39;r&#39;, encoding=detected_encoding, errors=&#39;ignore&#39;) as f: first_line = f.readline() separators = [&#39;,&#39;, &#39;\t&#39;, &#39;;&#39;, &#39;|&#39;, &#39;^&#39;] for sep in separators: columns = len(first_line.split(sep)) if columns > max_columns: max_columns = columns best_sep = sep if best_sep is not None: break except Exception as ex: print(f"尝试使用编码 {encoding} 时遇到错误: {ex}") if best_sep is None: raise ValueError(f"无法检测到分隔符,文件可能为空或格式不正确: {filepath}") return detected_encoding, best_sep def read_csv_with_encodings(filepath): """根据自动检测的编码读取CSV文件""" encoding, sep = detect_encoding_and_separator(filepath) try: df = pd.read_csv(filepath, encoding=encoding, sep=sep, quoting=3, low_memory=False, on_bad_lines=&#39;warn&#39;) print(f"成功读取文件: {filepath}") return df except Exception as ex: print(f"尝试读取文件时遇到错误: {ex}") raise ValueError(f"无法读取文件: {filepath},请检查文件格式或编码。") def process_csv_file(filepath, column_map, custom_handler, folder_type): """处理单个CSV文件,并返回处理后的DataFrame""" try: df_temp = read_csv_with_encodings(filepath) df_temp.columns = [custom_handler(col) for col in df_temp.columns] selected_columns = list(column_map.keys()) temp_df = df_temp[selected_columns].copy() temp_df.rename(columns=column_map, inplace=True) type_mapping = { "机房": "机房", "站点": "基站", "铁塔": "铁塔", "直放站": "室分_直放站", "分布系统": "室分", "BTS": "BBU_2G", "BBU": "BBU_4G", "DU": "BBU_5G", "CELL": "CELL_2G", "E-UTRANCELL": "CELL_4G", "NR-CELL": "CELL_5G", "E-NODEB": "SITE_4G", "GNODEB": "SITE_5G" } temp_df[&#39;类型&#39;] = type_mapping.get(folder_type, "未知类型") print(f"{os.path.basename(filepath)}已成功加载,本次包含行数:{len(temp_df)}") return temp_df except (pd.errors.ParserError, KeyError) as ex: print(f"在处理文件 {os.path.basename(filepath)} 时遇到错误: {ex}") return None def read_csv_with_encodings_station(filepath): """根据自动检测的编码读取CSV文件""" encoding, sep = detect_encoding_and_separator(filepath) try: with open(filepath, &#39;r&#39;, newline=&#39;&#39;, encoding=encoding, errors=&#39;replace&#39;) as csvfile: reader = csv.DictReader(csvfile, delimiter=sep, quotechar=&#39;@&#39;) data = list(reader) df = pd.DataFrame(data) print(f"成功读取文件: {filepath}") return df except Exception as ex: print(f"尝试读取文件时遇到错误: {ex}") raise ValueError(f"无法读取文件: {filepath},请检查文件格式或编码。") def process_csv_files_station(filepath, column_map, custom_handler, folder_type): """处理单个CSV文件,并返回处理后的DataFrame""" try: df_temp = read_csv_with_encodings_station(filepath) df_temp.columns = [custom_handler(col) for col in df_temp.columns] selected_columns = list(column_map.keys()) temp_df = df_temp[selected_columns].copy() temp_df.rename(columns=column_map, inplace=True) type_mapping = { "机房": "机房", "站点": "基站", "铁塔": "铁塔", "直放站": "室分_直放站", "分布系统": "室分", "BTS": "BBU_2G", "BBU": "BBU_4G", "DU": "BBU_5G", "CELL": "CELL_2G", "E-UTRANCELL": "CELL_4G", "NR-CELL": "CELL_5G", "E-NODEB": "SITE_4G", "GNODEB": "SITE_5G" } temp_df[&#39;类型&#39;] = type_mapping.get(folder_type, "未知类型") print(f"{os.path.basename(filepath)}已成功加载,本次包含行数:{len(temp_df)}") return temp_df except (pd.errors.ParserError, KeyError) as ex: print(f"在处理文件 {os.path.basename(filepath)} 时遇到错误: {ex}") return None def validate_dataframe(df, required_columns): """验证DataFrame是否包含必要的列""" for col in required_columns: if col not in df.columns: print(f"缺少必要的列: {col}") return False return True def read_and_process_folder(folder_path): """读取并处理文件夹中的所有CSV文件""" combined_dataframes_local = {} df_list_station = [] df_list_room = [] combined_other_dfs = [] key_columns_map = { "NR-CELL": [&#39;NAME$03008&#39;, &#39;ID$03008&#39;, &#39;ROOM_NAME$03008&#39;, &#39;ROOM_ID$03008&#39;], "GNODEB": [&#39;NAME$03007&#39;, &#39;ID$03007&#39;, &#39;ROOM_NAME$03007&#39;, &#39;ROOM_ID$03007&#39;], "E-UTRANCELL": [&#39;NAME$03006&#39;, &#39;ID$03006&#39;, &#39;ROOM_NAME$03006&#39;, &#39;ROOM_ID$03006&#39;], "E-NODEB": [&#39;NAME$03005&#39;, &#39;ID$03005&#39;, &#39;ROOM_NAME$03005&#39;, &#39;ROOM_ID$03005&#39;], "CELL": [&#39;NAME$03002&#39;, &#39;ID$03002&#39;, &#39;ROOM_NAME$03002&#39;, &#39;ROOM_ID$03002&#39;], "BTS": [&#39;NAME$03001&#39;, &#39;ID$03001&#39;, &#39;ROOM_NAME$03001&#39;, &#39;ROOM_ID$03001&#39;], "BBU": [&#39;NAME$03010&#39;, &#39;ID$03010&#39;, &#39;ROOM_NAME$03010&#39;, &#39;ROOM_ID$03010&#39;], "DU": [&#39;NAME$03011&#39;, &#39;ID$03011&#39;, &#39;ROOM_NAME$03011&#39;, &#39;ROOM_ID$03011&#39;], "铁塔": [&#39;NAME$03020&#39;, &#39;ID$03020&#39;, &#39;ROOM_NAME$03020&#39;, &#39;ROOM_ID$03020&#39;], "直放站": [&#39;NAME$03019&#39;, &#39;ID$03019&#39;, &#39;ROOM_NAME$03019&#39;, &#39;ROOM_ID$03019&#39;], "分布系统": [&#39;NAME$03018&#39;, &#39;ID$03018&#39;, &#39;ROOM_NAME$03018&#39;, &#39;ROOM_ID$03018&#39;] } new_column_names = {k: [&#39;NAME&#39;, &#39;ID&#39;, &#39;ROOM_NAME&#39;, &#39;ROOM_ID&#39;] for k in key_columns_map.keys()} for root, dirs, files in os.walk(folder_path): current_folder_name = os.path.basename(root) if current_folder_name == "站点": column_map = {&#39;MNT_LEVEL$01002&#39;: &#39;STATION_LEVEL&#39;, &#39;NAME$01002&#39;: &#39;STATION_NAME&#39;, &#39;ID$01002&#39;: &#39;STATION_ID&#39;, &#39;LATITUDE$01002&#39;: &#39;LATITUDE&#39;, &#39;LONGITUDE$01002&#39;: &#39;LONGITUDE&#39;, "MNT_TYPE$01002": "维护类型", "LIFE_STATE$01002": "生命周期状态", "EXT_CODE$01002": "铁塔编码", "IS_MATCHING$01002": "是否含配套", "ORGANIZATION_NAME$01002": "维护小组", "BELONG_SPECIALITY$01002": "关联专业", "MNT_DIFFICULTY$01002": "维护难度", "CITY_NAME$01002": "地市"} for file in files: if file.lower().endswith(&#39;.csv&#39;): df_station = process_csv_files_station(os.path.join(root, file), column_map, custom_field_handler, current_folder_name) if df_station is not None and validate_dataframe(df_station, [&#39;STATION_NAME&#39;, &#39;STATION_ID&#39;]): df_list_station.append(df_station) if df_list_station: combined_dataframes_local[&#39;站点&#39;] = pd.concat(df_list_station, ignore_index=True) elif current_folder_name == "机房": column_map = {&#39;NAME$01003&#39;: &#39;ROOM_NAME&#39;, &#39;ID$01003&#39;: &#39;ROOM_ID&#39;, &#39;SITE_NAME$01003&#39;: &#39;STATION_NAME&#39;, &#39;SITE_ID$01003&#39;: &#39;STATION_ID&#39;} for file in files: if file.lower().endswith(&#39;.csv&#39;): df_room = process_csv_files_station(os.path.join(root, file), column_map, custom_field_handler, current_folder_name) if df_room is not None and validate_dataframe(df_room, [&#39;ROOM_NAME&#39;, &#39;ROOM_ID&#39;]): df_list_room.append(df_room) if df_list_room: combined_dataframes_local[&#39;机房&#39;] = pd.concat(df_list_room, ignore_index=True) elif current_folder_name in key_columns_map: current_columns_key = key_columns_map[current_folder_name] current_column_names = new_column_names[current_folder_name] column_map = dict(zip(current_columns_key, current_column_names)) for file in files: if file.lower().endswith(&#39;.csv&#39;): data = process_csv_file(os.path.join(root, file), column_map, custom_field_handler, current_folder_name) if data is not None: combined_other_dfs.append(data) if combined_other_dfs: combined_dataframes_local[&#39;合并表&#39;] = pd.concat(combined_other_dfs, ignore_index=True) # 合并机房与站点 if &#39;机房&#39; in combined_dataframes_local and &#39;站点&#39; in combined_dataframes_local: station_columns = [&#39;STATION_ID&#39;, &#39;STATION_LEVEL&#39;, &#39;LONGITUDE&#39;, &#39;LATITUDE&#39;, &#39;维护类型&#39;] df_station_subset = combined_dataframes_local[&#39;站点&#39;][station_columns] combined_dataframes_local[&#39;机房&#39;] = combined_dataframes_local[&#39;机房&#39;].merge( df_station_subset, on=&#39;STATION_ID&#39;, how=&#39;left&#39; ) # 合并合并表与机房 if &#39;合并表&#39; in combined_dataframes_local and &#39;机房&#39; in combined_dataframes_local: room_columns = [&#39;ROOM_ID&#39;, &#39;STATION_NAME&#39;, &#39;STATION_ID&#39;, &#39;STATION_LEVEL&#39;, &#39;LONGITUDE&#39;, &#39;LATITUDE&#39;, &#39;维护类型&#39;] df_room_subset = combined_dataframes_local[&#39;机房&#39;][room_columns] combined_dataframes_local[&#39;合并表&#39;] = combined_dataframes_local[&#39;合并表&#39;].merge( df_room_subset, on=&#39;ROOM_ID&#39;, how=&#39;left&#39; ) return combined_dataframes_local @retry(stop=stop_after_attempt(3), wait=wait_fixed(5)) def safe_insert_chunk(chunk, table_name, engine_url, chunk_id, total_chunks): """安全插入单个数据块,包含重试机制""" chunk_start = time.time() start_mem = psutil.virtual_memory().used thread_engine = create_engine(engine_url, pool_pre_ping=True, pool_recycle=3600) inner_connection = thread_engine.connect() transaction = inner_connection.begin() # 显式开始事务 try: # 使用显式连接执行插入 chunk.to_sql( name=table_name, con=inner_connection, if_exists=&#39;append&#39;, index=False, method=&#39;multi&#39;, chunksize=1000 # 内部进一步分块 ) transaction.commit() # 显式提交事务 chunk_elapsed = time.time() - chunk_start print(f"块 {chunk_id}/{total_chunks} 导入完成,行数: {len(chunk)}, 耗时: {chunk_elapsed:.2f}秒") return len(chunk), chunk_elapsed except Exception as insert_error: transaction.rollback() # 出错时显式回滚 print( f"块 {chunk_id}/{total_chunks} 导入失败 (尝试 {safe_insert_chunk.retry.statistics.get(&#39;attempt_number&#39;, 1)}/3): {str(insert_error)}") raise finally: connection.close() thread_engine.dispose() del chunk gc.collect() end_mem = psutil.virtual_memory().used print(f"内存使用: {(end_mem - start_mem) / 1024 / 1024:.2f}MB") def parallel_insert_to_sql(df, table_name, engine_url, chunk_size=10000, max_workers=10): """并行将DataFrame分块导入数据库,修复计数问题""" if df.empty: print("没有数据需要导入") return total_chunks = (len(df) + chunk_size - 1) // chunk_size print(f"数据将分为 {total_chunks} 块进行并行导入,每块 {chunk_size} 行,使用 {max_workers} 个工作线程") # 性能监控 successful_chunks = 0 failed_chunks = 0 total_rows_inserted = 0 insert_start_time = time.time() # 创建进度条 with tqdm(total=total_chunks, desc="并行导入") as pbar: # 使用线程池处理 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {} # 提交所有任务 for i in range(0, len(df), chunk_size): chunk = df.iloc[i:i + chunk_size].copy() chunk_id = i // chunk_size + 1 # 提交任务到线程池 future = executor.submit( safe_insert_chunk, chunk=chunk, table_name=table_name, engine_url=engine_url, chunk_id=chunk_id, total_chunks=total_chunks ) futures[future] = chunk_id # 等待所有任务完成并收集结果 for future in as_completed(futures): chunk_id = futures[future] try: rows_inserted, elapsed = future.result() successful_chunks += 1 total_rows_inserted += rows_inserted pbar.update(1) # 定期打印统计信息 if successful_chunks % 10 == 0: current_elapsed = time.time() - insert_start_time rows_per_second = total_rows_inserted / current_elapsed if current_elapsed > 0 else 0 pbar.set_postfix({ "已插入": f"{total_rows_inserted}行", "速度": f"{rows_per_second:.0f}行/秒" }) except Exception as ex: failed_chunks += 1 print(f"块 {chunk_id}/{total_chunks} 处理失败: {str(ex)}") pbar.update(1) # 打印总体统计 total_time = time.time() - insert_start_time if successful_chunks > 0: avg_rows_per_second = total_rows_inserted / total_time print(f"导入完成! 总块数: {total_chunks}, 成功块数: {successful_chunks}, 失败块数: {failed_chunks}") print(f"总行数: {total_rows_inserted}, 总耗时: {total_time:.2f}秒, 平均速度: {avg_rows_per_second:.0f}行/秒") else: print("所有块导入均失败") return total_rows_inserted # 主程序开始 start_time = time.time() print(f"程序开始执行,时间: {datetime.now().strftime(&#39;%Y-%m-%d %H:%M:%S&#39;)}") # 读取并处理数据 combined_dataframes = read_and_process_folder(base_path) # 合并结果 final_combined = pd.concat([ combined_dataframes.get(&#39;站点&#39;), combined_dataframes.get(&#39;机房&#39;), combined_dataframes.get(&#39;合并表&#39;) ], ignore_index=True) # 数据处理和清洗 final_combined = final_combined[ [&#39;地市&#39;, &#39;NAME&#39;, &#39;ID&#39;, &#39;ROOM_NAME&#39;, &#39;ROOM_ID&#39;, &#39;STATION_NAME&#39;, &#39;STATION_ID&#39;, &#39;生命周期状态&#39;, &#39;铁塔编码&#39;, &#39;是否含配套&#39;, &#39;STATION_LEVEL&#39;, &#39;LATITUDE&#39;, &#39;LONGITUDE&#39;, &#39;维护小组&#39;, &#39;关联专业&#39;, &#39;维护难度&#39;, &#39;类型&#39;, &#39;维护类型&#39;] ] final_combined[&#39;NAME&#39;] = np.where( final_combined[&#39;ROOM_NAME&#39;].isna(), final_combined[&#39;NAME&#39;].fillna(final_combined[&#39;STATION_NAME&#39;]), final_combined[&#39;NAME&#39;].fillna(final_combined[&#39;ROOM_NAME&#39;]) ) final_combined[&#39;ID&#39;] = np.where( final_combined[&#39;ROOM_ID&#39;].isna(), final_combined[&#39;ID&#39;].fillna(final_combined[&#39;STATION_ID&#39;]), final_combined[&#39;ID&#39;].fillna(final_combined[&#39;ROOM_ID&#39;]) ) # 提取并格式化日期 folder_name = os.path.basename(base_path) # 先在此处获取文件夹名称 try: date_obj = datetime.strptime(folder_name, "%Y%m%d") final_combined[&#39;DATE&#39;] = date_obj.strftime("%Y-%m-%d") except ValueError: final_combined[&#39;DATE&#39;] = None print(f"无效的日期格式: {folder_name}") # 清理数据 final_combined = final_combined.apply(lambda x: x.str.strip() if x.dtype == "object" else x) final_combined = final_combined.rename(columns={&#39;ID&#39;: &#39;SOURCE_ID&#39;}) # 调整列顺序 final_combined = final_combined[ [&#39;地市&#39;, &#39;NAME&#39;, &#39;SOURCE_ID&#39;, &#39;ROOM_NAME&#39;, &#39;ROOM_ID&#39;, &#39;STATION_NAME&#39;, &#39;STATION_ID&#39;, &#39;生命周期状态&#39;, &#39;铁塔编码&#39;, &#39;是否含配套&#39;, &#39;STATION_LEVEL&#39;, &#39;LATITUDE&#39;, &#39;LONGITUDE&#39;, &#39;维护小组&#39;, &#39;关联专业&#39;, &#39;维护难度&#39;, &#39;类型&#39;, &#39;维护类型&#39;, &#39;DATE&#39;] ] print(f"数据处理完成,总行数: {len(final_combined)}") # 数据库操作 print("开始数据库操作...") # 获取当前处理的日期 folder_name = os.path.basename(base_path) try: date_obj = datetime.strptime(folder_name, "%Y%m%d") current_date = date_obj.strftime("%Y-%m-%d") except ValueError: current_date = None print(f"无效的日期格式: {folder_name}") # 删除当天已有数据(如果存在) if current_date: try: with create_engine(database_url).connect() as connection: # 关闭索引和约束以提高删除性能 connection.execute(text("SET foreign_key_checks = 0")) connection.execute(text("SET unique_checks = 0")) delete_query = text("DELETE FROM zzwy WHERE DATE = :date") result = connection.execute(delete_query, {&#39;date&#39;: current_date}) connection.commit() print(f"已删除日期为 {current_date} 的 {result.rowcount} 条记录") # 重新启用索引和约束 connection.execute(text("SET foreign_key_checks = 1")) connection.execute(text("SET unique_checks = 1")) except Exception as e: print(f"删除当天数据时出错: {str(e)}") # 继续执行,尝试插入新数据 else: print("警告: 当前日期为空,跳过删除步骤") # 关闭索引和约束以提高性能 with create_engine(database_url).connect() as connection: connection.execute(text("SET foreign_key_checks = 0")) connection.execute(text("SET unique_checks = 0")) try: # 自动计算最佳chunksize available_memory = psutil.virtual_memory().available estimated_row_size = 1024 # 假设每行1KB内存 chunksize = min(int(available_memory * 0.3 / estimated_row_size), 500000) print(f"自动计算的chunksize: {chunksize}") # 创建表(如果不存在) with create_engine(database_url).connect() as connection: connection.execute(text(""" CREATE TABLE IF NOT EXISTS zzwy ( 地市 VARCHAR(255), NAME VARCHAR(255), SOURCE_ID VARCHAR(255), ROOM_NAME VARCHAR(255), ROOM_ID VARCHAR(255), STATION_NAME VARCHAR(255), STATION_ID VARCHAR(255), 生命周期状态 VARCHAR(255), 铁塔编码 VARCHAR(255), 是否含配套 VARCHAR(255), STATION_LEVEL VARCHAR(255), LATITUDE VARCHAR(255), LONGITUDE VARCHAR(255), 维护小组 VARCHAR(255), 关联专业 VARCHAR(255), 维护难度 VARCHAR(255), 类型 VARCHAR(255), 维护类型 VARCHAR(255), DATE VARCHAR(255) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 """)) # 并行导入数据 actual_rows_inserted = parallel_insert_to_sql( df=final_combined, table_name=&#39;zzwy&#39;, engine_url=database_url, chunk_size=20000, max_workers=10 ) # 验证数据完整性 if current_date: try: with create_engine(database_url).connect() as connection: result = connection.execute( text("SELECT COUNT(*) FROM zzwy WHERE DATE = :date"), {&#39;date&#39;: current_date} ) db_count = result.scalar() print( f"源数据行数: {len(final_combined)}, 实际插入行数: {actual_rows_inserted}, 数据库记录数(当天): {db_count}") if db_count == len(final_combined): print("数据导入完整! 数据库记录数与源数据一致") else: print(f"警告: 数据可能不完整! 差异: {len(final_combined) - db_count}") except Exception as e: print(f"验证数据完整性时出错: {str(e)}") else: print("无法验证数据完整性: 当前日期为空") except Exception as main_error: print(f"导入数据到数据库时发生错误: {str(main_error)}") raise finally: # 重新启用索引和约束 with create_engine(database_url).connect() as connection: connection.execute(text("SET foreign_key_checks = 1")) connection.execute(text("SET unique_checks = 1")) # 结束计时并输出耗时 end_time = time.time() elapsed_time = end_time - start_time print(f"程序执行完成,时间: {datetime.now().strftime(&#39;%Y-%m-%d %H:%M:%S&#39;)}") print(f"总耗时: {elapsed_time:.2f}秒 ({elapsed_time / 3600:.2f}小时)")
06-13
import logging import requests import pymysql from typing import Dict, List, Optional from concurrent.futures import ThreadPoolExecutor import time from dbutils.pooled_db import PooledDB 配置常量 MAX_WORKERS = 10 # 最大线程数 创建数据库连接池 POOL = PooledDB( creator=pymysql, maxconnections=MAX_WORKERS * 2, mincached=5, blocking=True, host=‘172.96.161.250’, user=‘root’, password=‘66a811ee546b02a1’, database=‘fusion’, charset=‘utf8mb4’, cursorclass=pymysql.cursors.DictCursor ) def is_valid_value(value) -> bool: “”“检查值是否有效(非None、非0、非空字符串)”“” if value is None: return False if isinstance(value, (int, float)) and value == 0: return False if isinstance(value, str) and value.strip() == “”: return False return True def extract_value(data: dict, path: str) -> Optional[str]: “”“安全地从嵌套字典中提取值”“” keys = path.split(.’) current = data for key in keys: if not isinstance(current, dict) or key not in current: return None current = current[key] if current is None: return None return current def query_movie(vod_id: int, imdb_id: str) -> bool: “”“查询电影信息并更新数据库”“” url = “https://imdb8.p.rapidapi.com/title/v2/get-ratings” headers = { “x-rapidapi-key”: “40eb93fa97mshba6ed80cd703eadp105b67jsn0fb3c77e09ea”, “x-rapidapi-host”: “imdb8.p.rapidapi.com” } querystring = {“tconst”: imdb_id} try: logging.debug(f"查询ID: {vod_id}, IMDB: {imdb_id}") response = requests.get(url, headers=headers, params=querystring, timeout=30) response.raise_for_status() data = response.json() # 提取所需字段 update_fields = {} # 提取vod_sub字段 vod_sub = extract_value(data, "titleText.text") if is_valid_value(vod_sub): update_fields["vod_sub"] = str(vod_sub).strip() # 提取vod_year字段 (只有当年份、月份、日期都存在才更新) year = extract_value(data, "releaseDate.year") month = extract_value(data, "releaseDate.month") day = extract_value(data, "releaseDate.day") # 检查所有日期组件是否有效 if all(is_valid_value(x) for x in [year, month, day]): try: # 转换为整数并验证范围 year_int = int(year) month_int = int(month) day_int = int(day) # 验证日期是否有效 if 1900 <= year_int <= 2100 and 1 <= month_int <= 12 and 1 <= day_int <= 31: vod_year = f"{year_int}-{month_int:02d}-{day_int:02d}" update_fields["vod_year"] = vod_year else: logging.warning(f"日期超出范围: {year}-{month}-{day}") except (TypeError, ValueError): logging.warning(f"无效日期: {year}-{month}-{day}") # 提取vod_douban_score字段 (只保留非0有效值) vod_douban_score = extract_value(data, "ratings.aggregateRating") if vod_douban_score is not None and vod_douban_score != 0: try: score_float = float(vod_douban_score) # 确保评分在合理范围内 if 0 < score_float <= 10: update_fields["vod_douban_score"] = score_float else: logging.warning(f"评分超出范围: {score_float}") except (TypeError, ValueError): logging.warning(f"无效评分值: {vod_douban_score}") # 如果没有有效字段则返回 if not update_fields: logging.warning(f"没有有效字段需要更新: IMDB {imdb_id}") return False # 使用连接池更新数据库 with POOL.connection() as connection: with connection.cursor() as cursor: # 动态构建更新SQL set_clause = ", ".join([f"{field} = %s" for field in update_fields.keys()]) values = list(update_fields.values()) + [vod_id] sql = f"UPDATE mac_vod SET {set_clause} WHERE vod_id = %s" cursor.execute(sql, values) connection.commit() updated_fields = "、".join(update_fields.keys()) logging.info(f"成功更新ID: {vod_id}, 更新字段: {updated_fields}") return True except requests.RequestException as re: logging.error(f"API请求失败: {vod_id}-{imdb_id} - {str(re)}") except Exception as e: logging.exception(f"处理错误: {vod_id}-{imdb_id} - {str(e)}") return False def query_last_vod_id() -> Optional[int]: “”“查询最后处理的vod_id”“” try: with POOL.connection() as connection: with connection.cursor() as cursor: sql = “SELECT vod_id FROM mac_vod WHERE imdb_id IS NOT NULL ORDER BY vod_id DESC LIMIT 1” cursor.execute(sql) result = cursor.fetchone() return result[‘vod_id’] if result else None except Exception as e: logging.error(f"查询最后ID失败: {str(e)}") return None def query_batch(last_id: int, batch_size: int = 50) -> List[dict]: “”“查询一批需要处理的数据”“” try: with POOL.connection() as connection: with connection.cursor() as cursor: sql = “SELECT vod_id, vod_name, imdb_id FROM mac_vod WHERE vod_id > %s AND imdb_id IS NOT NULL ORDER BY vod_id ASC LIMIT %s” cursor.execute(sql, (last_id, batch_size)) return cursor.fetchall() except Exception as e: logging.error(f"查询批次失败: {str(e)}") return [] def process_item(item: dict) -> None: “”“处理单个电影项”“” vod_id = item[‘vod_id’] imdb_id = item[‘imdb_id’] try: logging.debug(f"处理ID: {vod_id}, IMDB: {imdb_id}“) success = query_movie(vod_id, imdb_id) if not success: logging.warning(f"处理失败: {vod_id}-{imdb_id}”) except Exception as e: logging.exception(f"处理项异常: {vod_id}-{imdb_id} - {str(e)}") def main_loop(): “”“主处理循环”“” batch_size = 50 logging.info(“开始处理任务”) start_time = time.time() last_processed_id = query_last_vod_id() or 0 page = 1 total_processed = 0 while True: items = query_batch(last_processed_id, batch_size) if not items: logging.info("没有更多需要处理的数据") break logging.info(f"处理第 {page} 批, 数量: {len(items)} 条") # 使用线程池处理 with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: list(executor.map(process_item, items)) last_processed_id = items[-1][&#39;vod_id&#39;] total_processed += len(items) page += 1 elapsed = time.time() - start_time logging.info(f"任务完成! 共处理 {total_processed} 条记录, 耗时: {elapsed:.2f}秒") if name == ‘main: # 配置日志 logging.basicConfig( level=logging.INFO, format=‘%(asctime)s - %(levelname)s - %(message)s’, handlers=[ logging.StreamHandler(), logging.FileHandler(‘imdb_sync.log’) ] ) try: main_loop() except KeyboardInterrupt: logging.info("用户中断执行") except Exception as e: logging.exception("程序发生未处理异常") 没有更多需要处理的数据
06-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值