【索引】Volume 2. Data Structures

本书《AOAPCI:Beginning Algorithm Contests》的第二卷深入探讨了数据结构的基础,包括列表、二叉树和图等核心概念,旨在为初学者提供全面的数据结构学习指南。
import sys import io import numpy as np import matplotlib.pyplot as plt from pymatgen.io.vasp import Vasprun from pymatgen.core.structure import Structure from scipy.spatial import cKDTree import matplotlib as mpl import warnings import os import argparse import multiprocessing from functools import partial import time import logging from collections import defaultdict # 设置编码为 UTF-8 以支持特殊字符 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') # 配置日志记录 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("rdf_analysis.log", encoding='utf-8'), logging.StreamHandler() ] ) # 忽略可能的警告 warnings.filterwarnings("ignore", category=UserWarning) # 专业绘图设置 plt.style.use('seaborn-v0_8-whitegrid') mpl.rcParams.update({ 'font.family': 'serif', 'font.serif': ['Times New Roman', 'DejaVu Serif'], 'font.size': 12, 'axes.labelsize': 14, 'axes.titlesize': 16, 'xtick.labelsize': 12, 'ytick.labelsize': 12, 'figure.dpi': 600, 'savefig.dpi': 600, 'figure.figsize': (8, 6), 'lines.linewidth': 2.0, 'legend.fontsize': 10, 'legend.framealpha': 0.8, 'mathtext.default': 'regular', 'axes.linewidth': 1.5, 'xtick.major.width': 1.5, 'ytick.major.width': 1.5, 'xtick.major.size': 5, 'ytick.major.size': 5, }) def identify_atom_types(struct): """ 识别磷酸氧(Op)和所有氢原子(H) :param struct: 结构对象 :return: 包含Op和H索引的字典 """ phosphate_oxygens = [] all_hydrogens = [] # 识别所有氢原子 for i, site in enumerate(struct): if site.species_string == "H": all_hydrogens.append(i) # 识别磷酸氧:与P原子距离小于1.6A的氧原子 for i, site in enumerate(struct): if site.species_string == "P": # 获取P原子周围1.6A内的氧原子 neighbors = struct.get_neighbors(site, r=1.6) for neighbor in neighbors: if neighbor[0].species_string == "O": phosphate_oxygens.append(neighbor[0].index) # 去重 phosphate_oxygens = list(set(phosphate_oxygens)) logging.info(f"Identified {len(phosphate_oxygens)} phosphate oxygens and {len(all_hydrogens)} hydrogen atoms") return { "phosphate_oxygens": phosphate_oxygens, "all_hydrogens": all_hydrogens } def process_frame(struct, center_sel, target_sel, r_max, exclude_bonds, bond_threshold): """处理单帧结构计算RDF""" try: atom_types = identify_atom_types(struct) centers = center_sel(atom_types) targets = target_sel(atom_types) # 处理空原子类型情况 if len(centers) == 0 or len(targets) == 0: logging.warning(f"No centers or targets found in frame") return { "distances": np.array([], dtype=np.float64), "n_centers": 0, "n_targets": 0, "volume": struct.volume } center_coords = np.array([struct[i].coords for i in centers]) target_coords = np.array([struct[i].coords for i in targets]) lattice = struct.lattice kdtree = cKDTree(target_coords, boxsize=lattice.abc) # 动态确定邻居数量 k_val = min(50, len(targets)) if k_val == 0: return { "distances": np.array([], dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } try: distances, indices = kdtree.query(center_coords, k=k_val, distance_upper_bound=r_max) except Exception as e: logging.error(f"KDTree query error: {str(e)}") return { "distances": np.array([], dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } # 处理查询结果 valid_distances = [] for i in range(distances.shape[0]): center_idx = centers[i] for j in range(distances.shape[1]): dist = distances[i, j] if dist > r_max or np.isinf(dist): continue target_idx = targets[indices[i, j]] # 排除化学键 if exclude_bonds: actual_dist = struct.get_distance(center_idx, target_idx) if actual_dist < bond_threshold: continue valid_distances.append(dist) return { "distances": np.array(valid_distances, dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } except Exception as e: logging.error(f"Error processing frame: {str(e)}") return { "distances": np.array([], dtype=np.float64), "n_centers": 0, "n_targets": 0, "volume": struct.volume } def calculate_rdf(structures, center_sel, target_sel, r_max=8.0, bin_width=0.05, exclude_bonds=True, bond_threshold=1.3, workers=1): """ 计算径向分布函数 :param workers: 并行工作进程数 """ bins = np.arange(0, r_max, bin_width) hist = np.zeros(len(bins) - 1) total_centers = 0 total_targets = 0 total_volume = 0 func = partial(process_frame, center_sel=center_sel, target_sel=target_sel, r_max=r_max, exclude_bonds=exclude_bonds, bond_threshold=bond_threshold) # 使用多进程池 with multiprocessing.Pool(processes=workers) as pool: results = [] try: for res in pool.imap_unordered(func, structures): results.append(res) except Exception as e: logging.error(f"Error in parallel processing: {str(e)}") # 处理结果 n_frames = 0 for res in results: if res is None: continue n_frames += 1 valid_distances = res["distances"] n_centers = res["n_centers"] n_targets = res["n_targets"] volume = res["volume"] if len(valid_distances) > 0: hist += np.histogram(valid_distances, bins=bins)[0] total_centers += n_centers total_targets += n_targets total_volume += volume if n_frames == 0: r = bins[:-1] + bin_width/2 return r, np.zeros_like(r) avg_density = total_targets / total_volume if total_volume > 0 else 0 r = bins[:-1] + bin_width/2 rdf = np.zeros_like(r) for i in range(len(hist)): r_lower = bins[i] r_upper = bins[i+1] shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3) expected_count = shell_vol * avg_density * total_centers if expected_count > 1e-10: rdf[i] = hist[i] / expected_count else: rdf[i] = 0 return r, rdf # 选择器函数 def selector_phosphate_oxygens(atom_types): return atom_types["phosphate_oxygens"] def selector_all_hydrogens(atom_types): return atom_types["all_hydrogens"] def save_rdf_data(r, rdf, output_dir, system_name): """保存RDF数据到文本文件""" os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, f"{system_name}_Op_H_RDF.txt") try: with open(filename, 'w', encoding='utf-8') as f: # 使用A代替Å,避免编码问题 f.write("# Distance (A)\tg(r)\n") for i in range(len(r)): f.write(f"{r[i]:.4f}\t{rdf[i]:.6f}\n") logging.info(f"Saved RDF data to: {filename}") return True except Exception as e: logging.error(f"Error saving RDF data: {str(e)}") return False def plot_single_rdf(r, rdf, output_dir, system_name): """绘制并保存单个RDF图""" try: fig, ax = plt.subplots(figsize=(8, 6)) # 绘制RDF ax.plot(r, rdf, 'b-', linewidth=2.0, label=f'{system_name} Op-H RDF') # 标记氢键区域 ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region') ax.text(1.3, np.max(rdf)*0.8, 'H-bond Region', fontsize=12) # 设置坐标轴 - 使用A代替Å ax.set_xlim(0, 6.0) ax.set_ylim(0, np.max(rdf)*1.2) ax.set_xlabel('Radial Distance (A)', fontweight='bold') # 使用A代替Å ax.set_ylabel('g(r)', fontweight='bold') ax.set_title(f'{system_name}: Phosphate Oxygen - Hydrogen RDF', fontsize=16, pad=15) # 添加网格和图例 ax.grid(True, linestyle='--', alpha=0.5) ax.legend(loc='best', framealpha=0.8) # 保存图片 plt.tight_layout() filename = os.path.join(output_dir, f"{system_name}_Op_H_RDF.tiff") plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff') plt.close() logging.info(f"Saved RDF plot to: {filename}") return True except Exception as e: logging.error(f"Error plotting RDF for {system_name}: {str(e)}") return False def plot_combined_rdf(all_rdf_data, output_dir): """绘制并保存合并的RDF图""" try: fig, ax = plt.subplots(figsize=(10, 8)) # 颜色和线型设置 colors = ['b', 'r', 'g', 'm', 'c', 'y', 'k'] line_styles = ['-', '--', '-.', ':'] # 绘制所有体系的RDF曲线 for i, (system_name, (r, rdf)) in enumerate(all_rdf_data.items()): color = colors[i % len(colors)] line_style = line_styles[(i // len(colors)) % len(line_styles)] ax.plot(r, rdf, color=color, linestyle=line_style, linewidth=2.0, label=system_name) # 标记氢键区域 ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region') ax.text(1.3, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*0.8, 'H-bond Region', fontsize=12) # 设置坐标轴 ax.set_xlim(0, 6.0) ax.set_ylim(0, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*1.2) ax.set_xlabel('Radial Distance (A)', fontweight='bold') ax.set_ylabel('g(r)', fontweight='bold') ax.set_title('Phosphate Oxygen - Hydrogen RDF Comparison', fontsize=16, pad=15) # 添加网格和图例 ax.grid(True, linestyle='--', alpha=0.5) ax.legend(loc='best', framealpha=0.8) # 保存图片 plt.tight_layout() filename = os.path.join(output_dir, "combined_Op_H_RDF.tiff") plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff') plt.close() logging.info(f"Saved combined RDF plot to: {filename}") return True except Exception as e: logging.error(f"Error plotting combined RDF: {str(e)}") return False def load_vasprun_safe(filename): """安全加载Vasprun文件,处理编码问题""" try: # 尝试使用pymatgen的标准方法 return Vasprun(filename, ionic_step_skip=5) except Exception as e: logging.warning(f"Standard loading failed for {filename}: {str(e)}. Trying alternative method...") try: # 尝试显式指定编码 return Vasprun(filename, ionic_step_skip=5, parse_dos=False, parse_eigen=False) except Exception as e2: logging.error(f"Alternative loading failed for {filename}: {str(e2)}") return None def process_single_file(input_file, workers, output_dir): """处理单个VASP结果文件""" system_name = os.path.splitext(os.path.basename(input_file))[0] logging.info(f"Processing {input_file} with {workers} workers...") try: # 安全加载VASP结果 vr = load_vasprun_safe(input_file) if vr is None: logging.error(f"Failed to load VASP results from {input_file}") return None, None structures = vr.structures logging.info(f"Loaded {len(structures)} frames for {system_name}") # 计算Op-H RDF r, rdf = calculate_rdf( structures, selector_phosphate_oxygens, selector_all_hydrogens, r_max=8.0, bin_width=0.05, exclude_bonds=True, bond_threshold=1.3, workers=workers ) # 保存数据 save_success = save_rdf_data(r, rdf, output_dir, system_name) # 绘制单个图表 plot_success = plot_single_rdf(r, rdf, output_dir, system_name) if save_success and plot_success: logging.info(f"Completed processing for {system_name}") else: logging.warning(f"Processing for {system_name} completed with errors") return system_name, (r, rdf) except Exception as e: logging.error(f"Error processing {input_file}: {str(e)}") return None, None def main(input_files, workers=1): """主函数,处理多个文件""" start_time = time.time() # 创建输出目录 output_dir = "RDF_Results" os.makedirs(output_dir, exist_ok=True) # 存储所有体系的RDF数据 all_rdf_data = {} # 处理每个输入文件 for input_file in input_files: system_name, rdf_data = process_single_file(input_file, workers, output_dir) if rdf_data: all_rdf_data[system_name] = rdf_data # 绘制合并的RDF图 if len(all_rdf_data) > 1: plot_combined_rdf(all_rdf_data, output_dir) elapsed = time.time() - start_time logging.info(f"Completed all processing in {elapsed:.2f} seconds") if __name__ == "__main__": # 设置命令行参数 parser = argparse.ArgumentParser(description='Calculate Op-H RDF for multiple VASP simulations') parser.add_argument('input_files', type=str, nargs='+', help='Input vasprun.xml files (e.g., vasprun1.xml vasprun2.xml ...)') parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(), help=f'Number of parallel workers per file (default: {multiprocessing.cpu_count()})') args = parser.parse_args() logging.info(f"Starting Op-H RDF analysis for {len(args.input_files)} files with {args.workers} workers per file...") main(args.input_files, workers=args.workers) 这个代码中计算Op-H的RDF时候是不是自动去除了小于1.3的部分?
07-13
import os import ctypes import tkinter as tk from tkinter import ttk, messagebox import sys import requests import json import re import faiss import numpy as np import sounddevice as sd import queue import threading import time import json from vosk import Model, KaldiRecognizer os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" from sentence_transformers import SentenceTransformer from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import ( PyPDFLoader, Docx2txtLoader, TextLoader, UnstructuredExcelLoader ) # Set the address of the Server. server_url = 'http://127.0.0.1:8080/rkllm_chat' # Create a session object. session = requests.Session() session.keep_alive = False # Close the connection pool to maintain a long connection. adapter = requests.adapters.HTTPAdapter(max_retries=5) session.mount('https://', adapter) session.mount('http://', adapter) # Set the dynamic library path xtts_lib = ctypes.CDLL('./aikit_xtts.so') # Define the structures from the library xtts_lib.ai_xtts.restype = ctypes.c_int # 设置返回类型为整型 xtts_lib.ai_xtts.argtypes = [ctypes.c_char_p] # 设置参数类型为char指针(C风格的字符串) class FileBasedVectorDB: def __init__(self, embedding_model="sentence-transformers/all-MiniLM-L6-v2"): # 初始化文本嵌入模型 self.embedder = SentenceTransformer(embedding_model) self.dimension = self.embedder.get_sentence_embedding_dimension() # 创建 FAISS 索引 self.index = None self.documents = [] # 存储原始文本片段 self.metadata = [] # 存储文档元数据 def import_from_folder(self, folder_path: str, file_extensions=None): """从文件夹导入所有指定类型的文件""" if file_extensions is None: file_extensions = ['.pdf', '.docx', '.doc', '.txt', '.xlsx', '.xls'] print(f"开始扫描文件夹: {folder_path}") file_paths = [] for root, _, files in os.walk(folder_path): for file in files: if os.path.splitext(file)[1].lower() in file_extensions: file_paths.append(os.path.join(root, file)) print(f"找到 {len(file_paths)} 个符合要求的文件") return self.process_files(file_paths) def import_from_filelist(self, file_list_path: str): """从文件列表导入文档""" if not os.path.exists(file_list_path): raise FileNotFoundError(f"文件列表不存在: {file_list_path}") with open(file_list_path, 'r') as f: file_paths = [line.strip() for line in f if line.strip()] print(f"从文件列表导入 {len(file_paths)} 个文件") return self.process_files(file_paths) def process_files(self, file_paths: list): """处理文件列表并提取文本""" text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) for path in file_paths: if not os.path.exists(path): print(f"⚠️ 文件不存在: {path}") continue file_ext = os.path.splitext(path)[1].lower() print(f"处理文件: {path} ({file_ext})") try: # 根据文件类型选择加载器 if file_ext == '.pdf': loader = PyPDFLoader(path) elif file_ext in ['.doc', '.docx']: loader = Docx2txtLoader(path) elif file_ext == '.txt': loader = TextLoader(path) elif file_ext in ['.xlsx', '.xls']: loader = UnstructuredExcelLoader(path) else: print(f"⚠️ 不支持的文件格式: {file_ext}") continue # 加载并分割文档 raw_docs = loader.load() chunks = text_splitter.split_documents(raw_docs) # 存储文档片段和元数据 for idx, chunk in enumerate(chunks): self.documents.append(chunk.page_content) self.metadata.append({ "source": os.path.basename(path), "path": path, "chunk_id": idx, "type": file_ext[1:] }) print(f" ├─提取 {len(chunks)} 个文本片段") except Exception as e: print(f" ⚠️ 处理错误: {str(e)}") print(f"总共提取 {len(self.documents)} 个文本片段") return len(self.documents) def build_index(self, index_type="flat"): """构建向量索引""" if not self.documents: print("❌ 没有可处理的文档") return None # 生成文档嵌入向量 print("生成文本嵌入向量...") embeddings = self.embedder.encode(self.documents, show_progress_bar=True) # 创建索引 print(f"创建 {index_type.upper()} 索引...") if index_type == "flat": self.index = faiss.IndexFlatL2(self.dimension) elif index_type == "ivfflat": quantizer = faiss.IndexFlatL2(self.dimension) self.index = faiss.IndexIVFFlat(quantizer, self.dimension, 127) self.index.train(embeddings) # IVF索引需要训练 elif index_type == "hnsw": self.index = faiss.IndexHNSWFlat(self.dimension, 32) # 添加到索引 self.index.add(np.array(embeddings).astype('float32')) print(f"✅ 索引构建完成,包含 {len(self.documents)} 个向量") return self.index def save_index(self, save_dir: str): """保存索引和元数据""" if not os.path.exists(save_dir): os.makedirs(save_dir) # 保存FAISS索引 faiss.write_index(self.index, os.path.join(save_dir, "vector_index.index")) # 保存元数据(使用CSV格式方便查看) import csv with open(os.path.join(save_dir, "metadata.csv"), 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=["source", "path", "chunk_id", "type"]) writer.writeheader() writer.writerows(self.metadata) # 保存文档片段(可选) with open(os.path.join(save_dir, "documents.txt"), 'w', encoding='utf-8') as f: for doc in self.documents: f.write(doc.replace('\n', ' ') + '\n') print(f"💾 索引保存到: {save_dir}") def load_index(self, save_dir: str): """加载已有索引""" # 加载FAISS索引 self.index = faiss.read_index(os.path.join(save_dir, "vector_index.index")) # 加载元数据 import csv self.metadata = [] with open(os.path.join(save_dir, "metadata.csv"), 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: self.metadata.append(row) # 加载文档片段 self.documents = [] if os.path.exists(os.path.join(save_dir, "documents.txt")): with open(os.path.join(save_dir, "documents.txt"), 'r', encoding='utf-8') as f: self.documents = [line.strip() for line in f] print(f"🔍 加载索引成功,包含 {len(self.metadata)} 个文档片段") def search(self, query: str, top_k: int = 5): """向量相似度搜索""" if self.index is None: raise RuntimeError("索引未初始化,请先加载或构建索引") query_embed = self.embedder.encode([query]) distances, indices = self.index.search(query_embed, top_k) results = [] for idx, dist in zip(indices[0], distances[0]): results.append({ "text": self.documents[idx], "metadata": self.metadata[idx], "distance": float(dist) }) return results class VoiceAssistantApp: def __init__(self, root, vector_db, result_queue): self.root = root self.vector_db = vector_db self.root.title("大牧人AI助手") self.root.geometry("1024x768") self.root.resizable(True, True) # 创建主界面框架 self.create_main_frame() # 创建参数存储字典 self.settings = { "voice_engine": "Google", "language": "中文", "hotword": "你好,牧牧", "volume": 70, "response_speed": 3 } # 加载模型 self.model = Model("vosk-model-small-cn-0.22") # 使用自己下载好的模型路径 self.recognizer = KaldiRecognizer(self.model, 16000) self.recognizer.SetWords(["AI", "人工智能"]) # 添加自定义词汇 # 音频队列和流 self.audio_queue = queue.Queue() self.audio_stream = None self.result_queue = result_queue # 线程控制 self.listening = False self.processing_thread = None self.stop_event = threading.Event() self.last_sound_time = 0; def intelligent_postprocess(self, text): # 字母标准化 letter_map = {'A':'诶','B':'必','C':'西','D':'迪','E':'伊','F':'艾弗', 'G':'吉','H':'艾尺','I':'艾','J':'杰','K':'开','L':'艾勒', 'M':'艾姆','N':'艾娜','O':'哦','P':'屁','Q':'克由','R':'艾儿', 'S':'艾丝','T':'提','U':'伊吾','V':'维','W':'达布溜','X':'艾克斯', 'Y':'歪','Z':'贼德'} # 处理字母单词组合 (如 "AI系统" -> "诶艾系统") processed = "" buffer = "" for char in text: if char.isupper(): buffer += char else: if buffer: # 检查是否是常见缩写词 if buffer in ["AI", "CPU", "GPU"]: processed += "".join(letter_map[c] for c in buffer) else: processed += letter_map.get(buffer, buffer) buffer = "" processed += char # 上下文修正 corrections = { "西提艾": "CPU", "记屁优": "GPU", "诶艾": "AI" } for wrong, correct in corrections.items(): processed = processed.replace(wrong, correct) return processed def audio_callback(self, indata, frames, time, status): """音频回调函数,将数据放入队列""" if status: print(f"音频错误: {status}") try: # 计算当前块的RMS值(音量) rms = np.sqrt(np.mean(indata**2)) self.audio_queue.put(bytes(indata)) # 检测是否静音 is_silent = rms < SILENCE_THRESHOLD # 更新最后检测到声音的时间 if not is_silent: self.last_sound_time = time.time() except Exception as e: error_msg = f"音频队列错误: {str(e)}" print(error_msg) def create_main_frame(self): # 主界面框架 self.main_frame = ttk.Frame(self.root, padding=20) self.main_frame.pack(fill=tk.BOTH, expand=True) # 标题标签 title_label = ttk.Label( self.main_frame, text="大牧人AI助手", font=("微软雅黑", 24, "bold"), foreground="#2c3e50" ) title_label.pack(pady=20) # 状态显示区域 status_frame = ttk.LabelFrame(self.main_frame, text="系统状态") status_frame.pack(fill=tk.X, padx=10, pady=10) self.status_label = ttk.Label( status_frame, text="准备就绪", foreground="green" ) self.status_label.pack(pady=5) # 交互记录文本框 log_frame = ttk.LabelFrame(self.main_frame, text="交互记录") log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) self.log_text = tk.Text( log_frame, height=10, wrap=tk.WORD, state=tk.NORMAL ) self.log_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) self.log_text.insert(tk.END, "系统启动...\n") self.log_text.config(state=tk.DISABLED) scrollbar = ttk.Scrollbar(log_frame, command=self.log_text.yview) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.log_text.config(yscrollcommand=scrollbar.set) # 输入区域 self.create_input_fields() # 控制按钮区域 button_frame = ttk.Frame(self.main_frame) button_frame.pack(fill=tk.X, padx=10, pady=20) self.start_btn = ttk.Button( button_frame, text="启动监听", command=self.toggle_listening, width=15 ) self.start_btn.pack(side=tk.LEFT, padx=10) settings_btn = ttk.Button( button_frame, text="参数设置", command=self.open_settings, width=15 ) settings_btn.pack(side=tk.LEFT, padx=10) clear_btn = ttk.Button( button_frame, text="清空记录", command=self.clear_log, width=15 ) clear_btn.pack(side=tk.LEFT, padx=10) exit_btn = ttk.Button( button_frame, text="退出系统", command=self.root.quit, width=15 ) exit_btn.pack(side=tk.RIGHT, padx=10) # 监听状态标志 self.listening = False def create_input_fields(self): self.input_entry = ttk.Entry(self.main_frame, width=25) self.input_entry.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) def rkllm_chat(self, user_message): is_streaming = True try: if user_message == "exit": print("============================") print("The RKLLM Server is stopping......") print("============================") else: # Set the request headers; in this case, the headers have no actual effect and are only used to simulate the OpenAI interface design. headers = { 'Content-Type': 'application/json', 'Authorization': 'not_required' } # Prepare the data to be sent # model: The model defined by the user when setting up RKLLM-Server; this has no effect here # messages: The user's input question, which RKLLM-Server will use as input and return the model's reply; multiple questions can be added to messages # stream: Whether to enable streaming conversation, similar to the OpenAI interface data = { "model": 'your_model_deploy_with_RKLLM_Server', "messages": [{"role": "user", "content": user_message}], "stream": is_streaming, "enable_thinking": False, "tools": None } # Send a POST request responses = session.post(server_url, json=data, headers=headers, stream=is_streaming, verify=False) if not is_streaming: # Parse the response if responses.status_code == 200: print("Q:", data["messages"][-1]["content"]) print("A:", json.loads(responses.text)["choices"][-1]["message"]["content"]) else: print("Error:", responses.text) else: if responses.status_code == 200: print("Q:", data["messages"][-1]["content"]) print("A:", end="") answer = "牧牧:" for line in responses.iter_lines(): if line: line = json.loads(line.decode('utf-8')) if line["choices"][-1]["finish_reason"] != "stop": print(line["choices"][-1]["delta"]["content"], end="") answer+=line["choices"][-1]["delta"]["content"] sys.stdout.flush() self.add_log(answer) byte = answer.encode('utf-8') result_xtts = xtts_lib.ai_xtts(byte) # 注意:需要传递bytes类型的数据 else: print('Error:', responses.text) except KeyboardInterrupt: # Capture Ctrl-C signal to close the session session.close() print("\n") print("============================") print("The RKLLM Server is stopping......") print("============================") def toggle_listening(self): """切换语音监听状态""" self.listening = not self.listening if self.listening: self.start_btn.config(text="停止监听") self.status_label.config(text="正在监听...", foreground="red") self.add_log("开始语音监听...") # 重置识别器以确保状态清除 self.recognizer = KaldiRecognizer(self.model, 16000) self.recognizer.SetWords(["AI", "人工智能"]) # 启动音频流 self.audio_stream = sd.RawInputStream( samplerate=16000, blocksize=4000, dtype='int16', channels=1, callback=self.audio_callback ) self.audio_stream.start() self.add_log("音频流已启动") # 启动处理线程 self.stop_event.clear() self.processing_thread = threading.Thread(target=self.process_audio) self.processing_thread.daemon = True self.processing_thread.start() self.add_log("音频处理线程已启动") else: # 停止监听 self.listening = False self.start_btn.config(text="启动监听") self.status_label.config(text="准备就绪", foreground="green") self.add_log("停止语音监听") # 停止音频流 if self.audio_stream: self.audio_stream.stop() self.audio_stream.close() self.audio_stream = None self.add_log("音频流已关闭") # 通知处理线程停止 self.stop_event.set() if self.processing_thread and self.processing_thread.is_alive(): self.processing_thread.join(timeout=1.0) def process_audio(self): """处理音频队列的线程函数""" self.add_log("音频处理线程启动") print("音频处理线程启动") # 添加超时计数器 no_data_counter = 0 MAX_NO_DATA = 20 # 10秒无数据超时 audio_data = [] while not self.stop_event.is_set(): try: # 非阻塞获取音频数据(等待1秒) if not self.audio_queue.empty(): data = self.audio_queue.get() audio_data.append(data) print(f"\n检测到声音音持续 {self.last_sound_time:.1f} 录音") current_time = time.time() silence_duration = current_time - self.last_sound_time if silence_duration > MAX_NO_DATA: print(f"\n检测到静音持续 {silence_duration:.1f} 秒,停止录音") # 如果音频数据不为空,送入识别器 if len(audio_data) > 0: # 喂给识别器 if self.recognizer.AcceptWaveform(audio_data): # 获取识别结果 result = json.loads(self.recognizer.Result()) text = result.get("text", "") if text: print(f"\n识别结果: {text}") self.result_queue.put(("full", text)) self.root.after(0, self.process_command, text) self.stop_event.set() self.root.after(0, self.toggle_listening) # 自动停止 else: # 获取部分结果用于调试 partial_result = json.loads(self.recognizer.PartialResult()) partial_text = partial_result.get("partial", "") if partial_text: print(f"\r实时识别: {partial_text}", end="", flush=True) self.result_queue.put(("partial", partial_text)) self.root.after(0, self.process_command, partial_text) self.stop_event.set() self.root.after(0, self.toggle_listening) # 自动停止 time.sleep(0.1) # 短暂休眠减少CPU占用 except queue.Empty: # 队列为空时计数 no_data_counter += 1 if no_data_counter >= MAX_NO_DATA: self.add_log("音频数据超时,停止监听") self.root.after(0, self.toggle_listening) # 自动停止 continue except Exception as e: error_msg = f"音频处理错误: {str(e)}" self.add_log(error_msg) print(error_msg) import traceback traceback.print_exc() self.add_log("音频处理线程退出") print("音频处理线程退出") # 清空队列 while not self.audio_queue.empty(): try: self.audio_queue.get_nowait() except queue.Empty: break def process_command(self, command): """处理识别到的语音命令""" self.add_log(f"用户: {command}") # 检查唤醒词 #if self.settings["hotword"] in command: # command = command.replace(self.settings["hotword"], "").strip() # self.add_log(f"检测到唤醒词: {self.settings['hotword']}") if not self.result_queue.empty(): # 向量搜索 no_space_command = command.translate(str.maketrans('', '', ' ')) # 删除所有空格 results = self.vector_db.search(no_space_command) context = "" for i, res in enumerate(results[:3]): # 取前三相关结果 print(f"\n🔍 结果 #{i+1} (距离: {res['distance']:.3f})") distance = res['distance'] if distance < 1.5: context += res['text'][:150] + "\n" prompt = f"上下文: {context}\n问题: {no_space_command}" if context else no_space_command self.result_queue.task_done() self.add_log(f"我: {no_space_command}...") # 调用AI self.rkllm_chat(prompt) def add_log(self, message): """添加日志信息""" self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, f"> {message}\n") self.log_text.see(tk.END) # 滚动到底部 self.log_text.config(state=tk.DISABLED) def clear_log(self): """清空日志""" self.log_text.config(state=tk.NORMAL) self.log_text.delete(1.0, tk.END) self.log_text.insert(tk.END, "日志已清空\n") self.log_text.config(state=tk.DISABLED) def open_settings(self): """打开设置窗口""" settings_window = tk.Toplevel(self.root) settings_window.title("参数设置") settings_window.geometry("500x400") settings_window.transient(self.root) # 设置为主窗口的临时窗口 settings_window.grab_set() # 模态窗口 # 设置标签框架 settings_frame = ttk.LabelFrame(settings_window, text="语音助手设置") settings_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20) # 语音引擎设置 ttk.Label(settings_frame, text="语音引擎:").grid(row=0, column=0, padx=10, pady=10, sticky=tk.W) engine_var = tk.StringVar(value=self.settings["voice_engine"]) engine_combo = ttk.Combobox( settings_frame, textvariable=engine_var, values=["Google", "Baidu", "Microsoft", "Amazon"], state="readonly", width=15 ) engine_combo.grid(row=0, column=1, padx=10, pady=10, sticky=tk.W) # 语言设置 ttk.Label(settings_frame, text="识别语言:").grid(row=1, column=0, padx=10, pady=10, sticky=tk.W) lang_var = tk.StringVar(value=self.settings["language"]) lang_combo = ttk.Combobox( settings_frame, textvariable=lang_var, values=["中文", "English", "日本語", "Español"], state="readonly", width=15 ) lang_combo.grid(row=1, column=1, padx=10, pady=10, sticky=tk.W) # 唤醒词设置 ttk.Label(settings_frame, text="唤醒词:").grid(row=2, column=0, padx=10, pady=10, sticky=tk.W) hotword_entry = ttk.Entry(settings_frame, width=20) hotword_entry.insert(0, self.settings["hotword"]) hotword_entry.grid(row=2, column=1, padx=10, pady=10, sticky=tk.W) # 音量设置 ttk.Label(settings_frame, text="音量:").grid(row=3, column=0, padx=10, pady=10, sticky=tk.W) volume_var = tk.IntVar(value=self.settings["volume"]) volume_scale = ttk.Scale( settings_frame, from_=0, to=100, variable=volume_var, length=200, orient=tk.HORIZONTAL ) volume_scale.grid(row=3, column=1, padx=10, pady=10, sticky=tk.W) volume_label = ttk.Label(settings_frame, text=f"{self.settings['volume']}%") volume_label.grid(row=3, column=2, padx=5, sticky=tk.W) # 响应速度设置 ttk.Label(settings_frame, text="响应速度:").grid(row=4, column=0, padx=10, pady=10, sticky=tk.W) speed_var = tk.IntVar(value=self.settings["response_speed"]) speed_scale = ttk.Scale( settings_frame, from_=1, to=5, variable=speed_var, length=200, orient=tk.HORIZONTAL ) speed_scale.grid(row=4, column=1, padx=10, pady=10, sticky=tk.W) speed_label = ttk.Label(settings_frame, text=f"Level {self.settings['response_speed']}") speed_label.grid(row=4, column=2, padx=5, sticky=tk.W) # 更新标签的函数 def update_volume_label(val): volume_label.config(text=f"{int(float(val))}%") def update_speed_label(val): speed_label.config(text=f"Level {int(float(val))}") volume_scale.config(command=update_volume_label) speed_scale.config(command=update_speed_label) # 保存按钮 def save_settings(): self.settings = { "voice_engine": engine_var.get(), "language": lang_var.get(), "hotword": hotword_entry.get(), "volume": int(volume_var.get()), "response_speed": int(speed_var.get()) } messagebox.showinfo("设置保存", "参数已成功保存!") settings_window.destroy() self.add_log("更新系统参数设置") save_btn = ttk.Button( settings_frame, text="保存设置", command=save_settings, width=15 ) save_btn.grid(row=5, column=1, pady=20) if __name__ == "__main__": result_queue = queue.Queue() # 识别结果队列 root = tk.Tk() vector_db = FileBasedVectorDB()#初始化本地资料向量库 vector_db.load_index("vector_db_storage")#加载本地资料库 app = VoiceAssistantApp(root, vector_db, result_queue) root.mainloop()
最新发布
11-27
import sys import io import numpy as np import matplotlib.pyplot as plt from pymatgen.io.vasp import Vasprun from pymatgen.core.structure import Structure from scipy.spatial import cKDTree import matplotlib as mpl import warnings import os import argparse import multiprocessing from functools import partial import time import logging from collections import defaultdict # 设置编码为 UTF-8 以支持特殊字符 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') # 配置日志记录 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("rdf_analysis.log", encoding='utf-8'), logging.StreamHandler() ] ) # 忽略可能的警告 warnings.filterwarnings("ignore", category=UserWarning) # 专业绘图设置 plt.style.use('seaborn-v0_8-whitegrid') mpl.rcParams.update({ 'font.family': 'serif', 'font.serif': ['Times New Roman', 'DejaVu Serif'], 'font.size': 12, 'axes.labelsize': 14, 'axes.titlesize': 16, 'xtick.labelsize': 12, 'ytick.labelsize': 12, 'figure.dpi': 600, 'savefig.dpi': 600, 'figure.figsize': (8, 6), 'lines.linewidth': 2.0, 'legend.fontsize': 10, 'legend.framealpha': 0.8, 'mathtext.default': 'regular', 'axes.linewidth': 1.5, 'xtick.major.width': 1.5, 'ytick.major.width': 1.5, 'xtick.major.size': 5, 'ytick.major.size': 5, }) def identify_atom_types(struct): """ 识别磷酸氧(Op)、非磷酸氧(O_non-Op)和所有氢原子(H) :param struct: 结构对象 :return: 包含Op, O_non-Op和H索引的字典 """ phosphate_oxygens = [] all_oxygens = [] all_hydrogens = [] # 识别所有氧原子和氢原子 for i, site in enumerate(struct): if site.species_string == "O": all_oxygens.append(i) elif site.species_string == "H": all_hydrogens.append(i) # 识别磷酸氧:与P原子距离小于1.6A的氧原子 for i, site in enumerate(struct): if site.species_string == "P": neighbors = struct.get_neighbors(site, r=1.6) for neighbor in neighbors: if neighbor[0].species_string == "O": phosphate_oxygens.append(neighbor[0].index) # 去重 phosphate_oxygens = list(set(phosphate_oxygens)) # 非磷酸氧 = 所有氧 - 磷酸氧 non_phosphate_oxygens = [idx for idx in all_oxygens if idx not in phosphate_oxygens] logging.info(f"磷酸氧(Op): {len(phosphate_oxygens)}个, " f"非磷酸氧(O_non-Op): {len(non_phosphate_oxygens)}个, " f"氢原子(H): {len(all_hydrogens)}个") return { "phosphate_oxygens": phosphate_oxygens, "non_phosphate_oxygens": non_phosphate_oxygens, "all_hydrogens": all_hydrogens } def process_frame(struct, center_sel, target_sel, r_max, exclude_bonds, bond_threshold): """处理单帧结构计算RDF""" try: atom_types = identify_atom_types(struct) centers = center_sel(atom_types) targets = target_sel(atom_types) # 处理空原子类型情况 if len(centers) == 0 or len(targets) == 0: logging.warning(f"No centers or targets found in frame") return { "distances": np.array([], dtype=np.float64), "n_centers": 0, "n_targets": 0, "volume": struct.volume } center_coords = np.array([struct[i].coords for i in centers]) target_coords = np.array([struct[i].coords for i in targets]) lattice = struct.lattice kdtree = cKDTree(target_coords, boxsize=lattice.abc) # 动态确定邻居数量 k_val = min(50, len(targets)) if k_val == 0: return { "distances": np.array([], dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } try: distances, indices = kdtree.query(center_coords, k=k_val, distance_upper_bound=r_max) except Exception as e: logging.error(f"KDTree query error: {str(e)}") return { "distances": np.array([], dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } # 处理查询结果 valid_distances = [] for i in range(distances.shape[0]): center_idx = centers[i] for j in range(distances.shape[1]): dist = distances[i, j] if dist > r_max or np.isinf(dist): continue target_idx = targets[indices[i, j]] # 排除化学键 if exclude_bonds: actual_dist = struct.get_distance(center_idx, target_idx) if actual_dist < bond_threshold: continue valid_distances.append(dist) return { "distances": np.array(valid_distances, dtype=np.float64), "n_centers": len(centers), "n_targets": len(targets), "volume": struct.volume } except Exception as e: logging.error(f"Error processing frame: {str(e)}") return { "distances": np.array([], dtype=np.float64), "n_centers": 0, "n_targets": 0, "volume": struct.volume } def calculate_rdf(structures, center_sel, target_sel, r_max=8.0, bin_width=0.05, exclude_bonds=True, bond_threshold=1.3, workers=1): """ 计算径向分布函数 :param workers: 并行工作进程数 """ bins = np.arange(0, r_max, bin_width) hist = np.zeros(len(bins) - 1) total_centers = 0 total_targets = 0 total_volume = 0 func = partial(process_frame, center_sel=center_sel, target_sel=target_sel, r_max=r_max, exclude_bonds=exclude_bonds, bond_threshold=bond_threshold) # 使用多进程池 with multiprocessing.Pool(processes=workers) as pool: results = [] try: for res in pool.imap_unordered(func, structures): results.append(res) except Exception as e: logging.error(f"Error in parallel processing: {str(e)}") # 处理结果 n_frames = 0 for res in results: if res is None: continue n_frames += 1 valid_distances = res["distances"] n_centers = res["n_centers"] n_targets = res["n_targets"] volume = res["volume"] if len(valid_distances) > 0: hist += np.histogram(valid_distances, bins=bins)[0] total_centers += n_centers total_targets += n_targets total_volume += volume if n_frames == 0: r = bins[:-1] + bin_width/2 return r, np.zeros_like(r) avg_density = total_targets / total_volume if total_volume > 0 else 0 r = bins[:-1] + bin_width/2 rdf = np.zeros_like(r) for i in range(len(hist)): r_lower = bins[i] r_upper = bins[i+1] shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3) expected_count = shell_vol * avg_density * total_centers if expected_count > 1e-10: rdf[i] = hist[i] / expected_count else: rdf[i] = 0 return r, rdf # 选择器函数 def selector_non_phosphate_oxygens(atom_types): return atom_types["non_phosphate_oxygens"] def selector_all_hydrogens(atom_types): return atom_types["all_hydrogens"] def save_rdf_data(r, rdf, output_dir, system_name): """保存RDF数据到文本文件""" os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, f"{system_name}_O_non-Op_H_RDF.txt") try: with open(filename, 'w', encoding='utf-8') as f: # 使用A代替Å,避免编码问题 f.write("# Distance (A)\tg(r)\n") for i in range(len(r)): f.write(f"{r[i]:.4f}\t{rdf[i]:.6f}\n") logging.info(f"Saved RDF data to: {filename}") return True except Exception as e: logging.error(f"Error saving RDF data: {str(e)}") return False def plot_single_rdf(r, rdf, output_dir, system_name): """绘制并保存单个RDF图""" try: fig, ax = plt.subplots(figsize=(8, 6)) # 绘制RDF ax.plot(r, rdf, 'b-', linewidth=2.0, label=f'{system_name} O_non-Op-H RDF') # 标记氢键区域 ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region') ax.text(1.3, np.max(rdf)*0.8, 'H-bond Region', fontsize=12) # 设置坐标轴 - 使用A代替Å ax.set_xlim(0, 6.0) ax.set_ylim(0, np.max(rdf)*1.2) ax.set_xlabel('Radial Distance (A)', fontweight='bold') # 使用A代替Å ax.set_ylabel('g(r)', fontweight='bold') ax.set_title(f'{system_name}: Non-Phosphate Oxygen - Hydrogen RDF', fontsize=16, pad=15) # 添加网格和图例 ax.grid(True, linestyle='--', alpha=0.5) ax.legend(loc='best', framealpha=0.8) # 保存图片 plt.tight_layout() filename = os.path.join(output_dir, f"{system_name}_O_non-Op_H_RDF.tiff") plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff') plt.close() logging.info(f"Saved RDF plot to: {filename}") return True except Exception as e: logging.error(f"Error plotting RDF for {system_name}: {str(e)}") return False def plot_combined_rdf(all_rdf_data, output_dir): """绘制并保存合并的RDF图""" try: fig, ax = plt.subplots(figsize=(10, 8)) # 颜色和线型设置 colors = ['b', 'r', 'g', 'm', 'c', 'y', 'k'] line_styles = ['-', '--', '-.', ':'] # 绘制所有体系的RDF曲线 for i, (system_name, (r, rdf)) in enumerate(all_rdf_data.items()): color = colors[i % len(colors)] line_style = line_styles[(i // len(colors)) % len(line_styles)] ax.plot(r, rdf, color=color, linestyle=line_style, linewidth=2.0, label=system_name) # 标记氢键区域 ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region') ax.text(1.3, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*0.8, 'H-bond Region', fontsize=12) # 设置坐标轴 ax.set_xlim(0, 6.0) ax.set_ylim(0, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*1.2) ax.set_xlabel('Radial Distance (A)', fontweight='bold') ax.set_ylabel('g(r)', fontweight='bold') ax.set_title('Non-Phosphate Oxygen - Hydrogen RDF Comparison', fontsize=16, pad=15) # 添加网格和图例 ax.grid(True, linestyle='--', alpha=0.5) ax.legend(loc='best', framealpha=0.8) # 保存图片 plt.tight_layout() filename = os.path.join(output_dir, "combined_O_non-Op_H_RDF.tiff") plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff') plt.close() logging.info(f"Saved combined RDF plot to: {filename}") return True except Exception as e: logging.error(f"Error plotting combined RDF: {str(e)}") return False def load_vasprun_safe(filename): """安全加载Vasprun文件,处理编码问题""" try: # 尝试使用pymatgen的标准方法 return Vasprun(filename, ionic_step_skip=5) except Exception as e: logging.warning(f"Standard loading failed for {filename}: {str(e)}. Trying alternative method...") try: # 尝试显式指定编码 return Vasprun(filename, ionic_step_skip=5, parse_dos=False, parse_eigen=False) except Exception as e2: logging.error(f"Alternative loading failed for {filename}: {str(e2)}") return None def process_single_file(input_file, workers, output_dir): """处理单个VASP结果文件""" system_name = os.path.splitext(os.path.basename(input_file))[0] logging.info(f"Processing {input_file} with {workers} workers...") try: # 安全加载VASP结果 vr = load_vasprun_safe(input_file) if vr is None: logging.error(f"Failed to load VASP results from {input_file}") return None, None structures = vr.structures logging.info(f"Loaded {len(structures)} frames for {system_name}") # 计算O_non-Op-H RDF r, rdf = calculate_rdf( structures, selector_non_phosphate_oxygens, # 中心原子:非磷酸氧 selector_all_hydrogens, # 目标原子:所有氢原子 r_max=8.0, bin_width=0.05, exclude_bonds=True, bond_threshold=0, workers=workers ) # 保存数据 save_success = save_rdf_data(r, rdf, output_dir, system_name) # 绘制单个图表 plot_success = plot_single_rdf(r, rdf, output_dir, system_name) if save_success and plot_success: logging.info(f"Completed processing for {system_name}") else: logging.warning(f"Processing for {system_name} completed with errors") return system_name, (r, rdf) except Exception as e: logging.error(f"Error processing {input_file}: {str(e)}") return None, None def main(input_files, workers=1): """主函数,处理多个文件""" start_time = time.time() # 创建输出目录 output_dir = "RDF_Results" os.makedirs(output_dir, exist_ok=True) # 存储所有体系的RDF数据 all_rdf_data = {} # 处理每个输入文件 for input_file in input_files: system_name, rdf_data = process_single_file(input_file, workers, output_dir) if rdf_data: all_rdf_data[system_name] = rdf_data # 绘制合并的RDF图 if len(all_rdf_data) > 1: plot_combined_rdf(all_rdf_data, output_dir) elapsed = time.time() - start_time logging.info(f"Completed all processing in {elapsed:.2f} seconds") if __name__ == "__main__": # 设置命令行参数 parser = argparse.ArgumentParser(description='Calculate O_non-Op-H RDF for multiple VASP simulations') parser.add_argument('input_files', type=str, nargs='+', help='Input vasprun.xml files (e.g., vasprun1.xml vasprun2.xml ...)') parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(), help=f'Number of parallel workers per file (default: {multiprocessing.cpu_count()})') args = parser.parse_args() logging.info(f"Starting O_non-Op-H RDF analysis for {len(args.input_files)} files with {args.workers} workers per file...") main(args.input_files, workers=args.workers) 该代码输出的结果是非磷酸氧与所以H之间的RDF吗?输出的结果又如何命名的
07-13
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值