for i, (name, code) in enumerate(colors):解释

for i, (name, code) in enumerate(colors):

是 Python 中的一个 for 循环语句,它用于遍历一个可迭代对象 colors,并获取每个元素的索引以及元素本身。

逐步解析这行代码:

1. for 循环:

for 是用来遍历可迭代对象(如列表、元组、字典、字符串等)的关键字。在这里,它用于遍历 colors

2. enumerate(colors)

  • enumerate 是一个内置的 Python 函数,它将一个可迭代对象(例如列表)与索引配对,并返回一个迭代器。enumerate 会返回一个由索引和元素组成的元组。
  • 如果 colors 是一个列表或其他类型的可迭代对象,enumerate(colors) 会产生一系列 (索引, 元素) 对。

例如,假设 colors = [("red", "#FF0000"), ("green", "#00FF00"), ("blue", "#0000FF")],那么 enumerate(colors) 会生成以下内容:

import numpy as np import matplotlib.pyplot as plt from pymatgen.io.vasp import Vasprun from pymatgen.core import Structure from scipy.optimize import curve_fit from tqdm import tqdm import matplotlib as mpl from collections import defaultdict from multiprocessing import Pool, cpu_count import time import argparse import os from scipy.spatial import cKDTree from scipy.signal import savgol_filter # ================ 配置区域 ================ # 在这里设置您的 vasprun.xml 文件默认路径 DEFAULT_VASPRUN_PATH = "C:/Users/zheng/vasprun4.xml" # 更改为您的实际路径 # ========================================= # 1. 设置期刊绘图格式 (The Journal of Chemical Physics) mpl.rcParams['font.family'] = 'sans-serif' mpl.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans', 'Liberation Sans'] mpl.rcParams['font.size'] = 10 mpl.rcParams['axes.linewidth'] = 1.2 mpl.rcParams['xtick.major.width'] = 1.2 mpl.rcParams['ytick.major.width'] = 1.2 mpl.rcParams['lines.linewidth'] = 2 mpl.rcParams['figure.dpi'] = 300 mpl.rcParams['figure.figsize'] = (3.5, 2.8) # 紧凑尺寸适合期刊 mpl.rcParams['savefig.dpi'] = 600 mpl.rcParams['savefig.bbox'] = 'tight' mpl.rcParams['savefig.pad_inches'] = 0.05 mpl.rcParams['pdf.fonttype'] = 42 mpl.rcParams['mathtext.fontset'] = 'dejavusans' # 2. 增强的原子类型识别函数 - 明确区分所有原子类型 def identify_atom_types(struct): """识别所有关键原子类型并排除自身化学键""" # 磷酸氧分类 p_oxygens = {"P=O": [], "P-O": [], "P-OH": []} phosphate_hydrogens = [] # 仅P-OH基团中的H原子 # 水合氢离子识别 hydronium_oxygens = [] hydronium_hydrogens = [] # H₃O⁺中的H原子 # 普通水分子 water_oxygens = [] water_hydrogens = [] # 普通水中的H原子 # 氟离子 fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"] # 铝离子 aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"] # 创建快速邻居查找表 neighbor_cache = defaultdict(list) for i, site in enumerate(struct): if site.species_string == "O": neighbors = struct.get_neighbors(site, r=1.3) h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"] neighbor_cache[i] = [h.index for h in h_neighbors] # 识别水合氢离子 (H₃O⁺) if len(h_neighbors) == 3: hydronium_oxygens.append(i) hydronium_hydrogens.extend([h.index for h in h_neighbors]) # 识别磷酸基团 for site in struct: if site.species_string == "P": neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径 # 筛选氧原子邻居 o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"] if len(o_neighbors) < 4: # 如果找不到4个氧原子,使用旧方法 for neighbor in o_neighbors: nn_site = neighbor[0] if neighbor[1] < 1.55: p_oxygens["P=O"].append(nn_site.index) else: if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)): p_oxygens["P-OH"].append(nn_site.index) else: p_oxygens["P-O"].append(nn_site.index) continue # 按距离排序 o_neighbors.sort(key=lambda x: x[1]) # 最近的氧原子为P=O p_double_o = o_neighbors[0][0] p_oxygens["P=O"].append(p_double_o.index) # 其他三个氧原子 for i in range(1, 4): o_site = o_neighbors[i][0] # 检查氧原子上是否有氢 if neighbor_cache.get(o_site.index, []): p_oxygens["P-OH"].append(o_site.index) else: p_oxygens["P-O"].append(o_site.index) # 识别P-OH基团中的H原子 (磷酸中的H) for o_idx in p_oxygens["P-OH"]: # 获取与P-OH氧相连的H原子 h_neighbors = neighbor_cache.get(o_idx, []) phosphate_hydrogens.extend(h_neighbors) # 识别普通水分子 (排除磷酸氧和水合氢离子) for i, site in enumerate(struct): if site.species_string == "O" and i not in hydronium_oxygens: is_phosphate_oxygen = False for cat in p_oxygens.values(): if i in cat: is_phosphate_oxygen = True break if not is_phosphate_oxygen: water_oxygens.append(i) # 识别普通水分子中的H原子 (水中的H) for o_idx in water_oxygens: h_neighbors = neighbor_cache.get(o_idx, []) water_hydrogens.extend(h_neighbors) return { "phosphate_oxygens": p_oxygens, "phosphate_hydrogens": phosphate_hydrogens, "water_oxygens": water_oxygens, "water_hydrogens": water_hydrogens, "hydronium_oxygens": hydronium_oxygens, "hydronium_hydrogens": hydronium_hydrogens, "fluoride_atoms": fluoride_atoms, "aluminum_atoms": aluminum_atoms } # 3. 预计算结构特征 def precompute_structural_features(structure): """预计算结构特征减少重复计算""" atom_types = identify_atom_types(structure) features = { 'atom_types': atom_types, 'volume': structure.volume, 'structure': structure } # 预构建KDTree用于快速距离查询 all_coords = np.array([site.coords for site in structure]) features['kdtree'] = cKDTree(all_coords) return features # 4. 氢键识别函数 (基于新分类) def identify_h_bonds_optimized(features, bond_type, r_cut=3.5, angle_cut=130): """使用预计算特征识别氢键""" struct = features['structure'] atom_types = features['atom_types'] kdtree = features['kdtree'] h_bonds = [] # 从原子类型字典中获取各类原子 p_oxy = atom_types["phosphate_oxygens"] po4_o = p_oxy["P=O"] + p_oxy["P-O"] + p_oxy["P-OH"] po4_h = atom_types["phosphate_hydrogens"] water_o = atom_types["water_oxygens"] water_h = atom_types["water_hydrogens"] hydronium_o = atom_types["hydronium_oxygens"] hydronium_h = atom_types["hydronium_hydrogens"] fluoride = atom_types["fluoride_atoms"] # 1. F与水中的H (F...H-O_water) if bond_type == 'F_H2O': for f_idx in fluoride: # 查找水中的H原子 for h_idx in water_h: # 排除化学键 (F-H) if struct.get_distance(f_idx, h_idx) < 1.3: continue # 检查距离 d_fh = struct.get_distance(f_idx, h_idx) if d_fh > r_cut: continue # 获取H所属的O原子 o_idx = next((o for o in water_o if h_idx in atom_types["water_hydrogens"]), None) if o_idx is None: continue # 计算角度 angle = struct.get_angle(o_idx, h_idx, f_idx) if angle > angle_cut: h_bonds.append((o_idx, f_idx)) # 2. F与水合氢离子的H (F...H-H3O) elif bond_type == 'F_H3O': for f_idx in fluoride: for h_idx in hydronium_h: # 排除化学键 if struct.get_distance(f_idx, h_idx) < 1.3: continue d_fh = struct.get_distance(f_idx, h_idx) if d_fh > r_cut: continue # 获取H所属的O原子 (水合氢离子) o_idx = next((o for o in hydronium_o if h_idx in atom_types["hydronium_hydrogens"]), None) if o_idx is None: continue angle = struct.get_angle(o_idx, h_idx, f_idx) if angle > angle_cut: h_bonds.append((o_idx, f_idx)) # 3. F与磷酸上的H (F...H-PO4) elif bond_type == 'F_PO4': for f_idx in fluoride: for h_idx in po4_h: # 排除化学键 if struct.get_distance(f_idx, h_idx) < 1.3: continue d_fh = struct.get_distance(f_idx, h_idx) if d_fh > r_cut: continue # 获取H所属的O原子 (磷酸) o_idx = next((o for o in p_oxy["P-OH"] if h_idx in atom_types["phosphate_hydrogens"]), None) if o_idx is None: continue angle = struct.get_angle(o_idx, h_idx, f_idx) if angle > angle_cut: h_bonds.append((o_idx, f_idx)) # 4. 磷酸与磷酸之间的氢键 elif bond_type == 'PO4_PO4': # 受体: 所有磷酸氧 acceptors = po4_o # 供体: P-OH基团 donors = [(o_idx, h_idx) for o_idx in p_oxy["P-OH"] for h_idx in atom_types["phosphate_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: # 查找附近的H原子 h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in po4_h: continue # 检查是否属于供体 donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue # 排除自身化学键 if struct.get_distance(acc_o, donor_o) < 1.8: continue # 计算角度 angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 5. 水与水之间的氢键 elif bond_type == 'H2O_H2O': # 受体: 水中的O acceptors = water_o # 供体: 水中的H donors = [(o_idx, h_idx) for o_idx in water_o for h_idx in atom_types["water_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in water_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None or donor_o == acc_o: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 6. 水合氢离子与水合氢离子之间的氢键 elif bond_type == 'H3O_H3O': acceptors = hydronium_o donors = [(o_idx, h_idx) for o_idx in hydronium_o for h_idx in atom_types["hydronium_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in hydronium_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None or donor_o == acc_o: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 7. 磷酸与水之间的氢键 (磷酸作为受体) elif bond_type == 'PO4_H2O_a': acceptors = po4_o donors = [(o_idx, h_idx) for o_idx in water_o for h_idx in atom_types["water_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in water_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 8. 磷酸与水之间的氢键 (磷酸作为供体) elif bond_type == 'PO4_H2O_d': acceptors = water_o donors = [(o_idx, h_idx) for o_idx in p_oxy["P-OH"] for h_idx in atom_types["phosphate_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in po4_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 9. 水与水合氢离子 (水作为受体) elif bond_type == 'H3O_H2O_a': acceptors = water_o donors = [(o_idx, h_idx) for o_idx in hydronium_o for h_idx in atom_types["hydronium_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in hydronium_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 10. 水与水合氢离子 (水合氢离子作为受体) elif bond_type == 'H3O_H2O_d': acceptors = hydronium_o donors = [(o_idx, h_idx) for o_idx in water_o for h_idx in atom_types["water_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in water_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 11. 磷酸与水合氢离子 (磷酸作为受体) elif bond_type == 'PO4_H3O_a': acceptors = po4_o donors = [(o_idx, h_idx) for o_idx in hydronium_o for h_idx in atom_types["hydronium_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in hydronium_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) # 12. 磷酸与水合氢离子 (磷酸作为供体) elif bond_type == 'PO4_H3O_d': acceptors = hydronium_o donors = [(o_idx, h_idx) for o_idx in p_oxy["P-OH"] for h_idx in atom_types["phosphate_hydrogens"] if struct.get_distance(o_idx, h_idx) < 1.3] for acc_o in acceptors: h_indices = kdtree.query_ball_point(struct[acc_o].coords, r=r_cut) for h_idx in h_indices: if h_idx not in po4_h: continue donor_o = next((o for o, h in donors if h == h_idx), None) if donor_o is None: continue if struct.get_distance(acc_o, donor_o) < 1.8: continue angle = struct.get_angle(donor_o, h_idx, acc_o) if angle > angle_cut: h_bonds.append((donor_o, acc_o)) return h_bonds # 5. 并行处理框架 (保持原样) def process_frame(args): """处理单个帧的并行函数""" i, structure, bond_types = args features = precompute_structural_features(structure) frame_results = {} for bt in bond_types: bonds = identify_h_bonds_optimized(features, bt) density = len(bonds) / features['volume'] * 1e24 / 6.022e23 frame_results[bt] = (bonds, density) return i, frame_results def calculate_tcf_parallel(h_bond_list, max_dt, n_workers): """并行计算时间关联函数""" total_bonds = len(h_bond_list[0]) if len(h_bond_list) > 0 else 0 tcf = np.zeros(max_dt) counts = np.zeros(max_dt, dtype=int) total_frames = len(h_bond_list) if total_frames - max_dt <= 0: print(f"Warning: Not enough frames for TCF calculation (total_frames={total_frames}, max_dt={max_dt})") return np.zeros(max_dt) tasks = [(t0, h_bond_list, max_dt) for t0 in range(total_frames - max_dt)] with Pool(n_workers) as pool: results = list(tqdm(pool.imap(process_tcf_task, tasks), total=len(tasks), desc="Parallel TCF Calculation")) for tcf_part, counts_part in results: tcf += tcf_part counts += counts_part valid_counts = np.where(counts > 0, counts, 1) tcf = tcf / valid_counts return tcf / total_bonds if total_bonds > 0 else np.zeros(max_dt) def process_tcf_task(args): """处理单个TCF任务的并行函数""" t0, h_bond_list, max_dt = args ref_bonds = set(h_bond_list[t0]) local_tcf = np.zeros(max_dt) local_counts = np.zeros(max_dt, dtype=int) for dt in range(max_dt): t = t0 + dt if t >= len(h_bond_list): break current_bonds = set(h_bond_list[t]) persistent_bonds = ref_bonds & current_bonds local_tcf[dt] = len(persistent_bonds) local_counts[dt] = 1 return local_tcf, local_counts # 6. 主函数 (优化版) def main_optimized(workers, vasprun_path): print(f"Using {workers} workers for parallel processing") print(f"Using vasprun file: {vasprun_path}") # 时间参数定义 time_per_step = 0.2e-3 skip_steps = 5 frame_interval = time_per_step * skip_steps print(f"Time per frame: {frame_interval:.6f} ps") # 读取VASP轨迹 print(f"Reading vasprun.xml...") try: vr = Vasprun(vasprun_path, ionic_step_skip=skip_steps) structures = vr.structures total_frames = len(structures) print(f"Loaded {total_frames} frames") except Exception as e: print(f"Error loading vasprun file: {str(e)}") return # 计算总模拟时间 total_sim_time = total_frames * frame_interval print(f"Total simulation time: {total_sim_time:.3f} ps") # 设置时间窗口 max_time_window = min(2.0, total_sim_time * 0.5) max_dt_frames = int(max_time_window / frame_interval) print(f"Setting TCF time window: {max_time_window:.3f} ps ({max_dt_frames} frames)") # 定义分析的氢键类型 (12种) bond_types = [ 'F_H2O', # F...H-O_water 'F_H3O', # F...H-H3O 'F_PO4', # F...H-PO4 'PO4_PO4', # PO4...PO4 (磷酸间) 'H2O_H2O', # O_water-H...O_water 'H3O_H3O', # H3O...H3O 'PO4_H2O_a', # PO4...H-O_water (磷酸氧作为受体) 'PO4_H2O_d', # PO4-H...O_water (磷酸H作为供体) 'H3O_H2O_a', # H3O...H-O_water (水合氢离子作为受体) 'H3O_H2O_d', # H3O-H...O_water (水合氢离子作为供体) 'PO4_H3O_a', # PO4...H-H3O (磷酸氧作为受体) 'PO4_H3O_d' # PO4-H...H3O (磷酸H作为供体) ] # 友好的标签名称 bond_labels = { 'F_H2O': 'F···H-O$_{water}$', 'F_H3O': 'F···H-H$_3$O$^+$', 'F_PO4': 'F···H-PO$_4$', 'PO4_PO4': 'PO$_4$···PO$_4$', 'H2O_H2O': 'O$_{water}$-H···O$_{water}$', 'H3O_H3O': 'H$_3$O$^+$···H$_3$O$^+$', 'PO4_H2O_a': 'PO$_4$···H-O$_{water}$', 'PO4_H2O_d': 'PO$_4$-H···O$_{water}$', 'H3O_H2O_a': 'H$_3$O$^+$···H-O$_{water}$', 'H3O_H2O_d': 'H$_3$O$^+$-H···O$_{water}$', 'PO4_H3O_a': 'PO$_4$···H-H$_3$O$^+$', 'PO4_H3O_d': 'PO$_4$-H···H$_3$O$^+$' } # 并行计算氢键密度 print(f"Parallel hydrogen bond calculation with {workers} workers...") start_time = time.time() # 创建任务列表 tasks = [(i, structure, bond_types) for i, structure in enumerate(structures)] # 使用进程池并行处理 densities = {bt: np.zeros(total_frames) for bt in bond_types} h_bond_lists = {bt: [None] * total_frames for bt in bond_types} try: with Pool(workers) as pool: results = list(tqdm(pool.imap(process_frame, tasks), total=total_frames, desc="Processing Frames")) # 整理结果 for i, frame_results in results: for bt in bond_types: bonds, density = frame_results[bt] densities[bt][i] = density h_bond_lists[bt][i] = bonds except Exception as e: print(f"Error during parallel processing: {str(e)}") return # 计算平均密度 avg_densities = {bt: np.mean(densities[bt]) for bt in bond_types} hb_time = time.time() - start_time print(f"H-bond calculation completed in {hb_time:.2f} seconds") # 并行计算TCF print("Parallel TCF calculation...") tcf_results = {} lifetimes = {} reliability = {} for bt in bond_types: print(f"\nProcessing bond type: {bt}") h_bond_list = h_bond_lists[bt] start_time = time.time() try: tcf = calculate_tcf_parallel(h_bond_list, max_dt_frames, workers) tcf_results[bt] = tcf except Exception as e: print(f"Error calculating TCF for {bt}: {str(e)}") tcf_results[bt] = np.zeros(max_dt_frames) continue # 创建时间轴 (转换为ps) t_frames = np.arange(len(tcf)) t_ps = t_frames * frame_interval # 指数拟合 mask = (tcf > 0.01) & (t_ps < max_time_window * 0.8) if np.sum(mask) > 3: try: popt, _ = curve_fit(exp_decay, t_ps[mask], tcf[mask], p0=[1.0, 1.0], maxfev=5000) lifetimes[bt] = popt[0] if lifetimes[bt] > max_time_window * 0.8: reliability[bt] = "unreliable (τ > 80% time window)" else: reliability[bt] = "reliable" print(f"Fit successful: τ = {lifetimes[bt]:.3f} ps ({reliability[bt]})") except Exception as e: print(f"Fit failed for {bt}: {str(e)}") lifetimes[bt] = np.nan reliability[bt] = "fit failed" else: print(f"Not enough data points for fitting {bt}") lifetimes[bt] = np.nan reliability[bt] = "insufficient data" tcf_time = time.time() - start_time print(f"TCF calculation for {bt} completed in {tcf_time:.2f} seconds") # 绘图 - 优化期刊格式 # 使用颜色循环,避免太多颜色 colors = plt.cm.tab20(np.linspace(0, 1, len(bond_types))) # 1. 氢键密度图 fig1, ax1 = plt.subplots(figsize=(3.5, 2.8)) time_axis = np.arange(len(structures)) * frame_interval # 选择最重要的6种氢键类型显示 selected_types = ['F_H2O', 'F_H3O', 'F_PO4', 'PO4_PO4', 'H2O_H2O', 'PO4_H2O_a'] for i, bt in enumerate(bond_types): if bt in selected_types: ax1.plot(time_axis, densities[bt], color=colors[i], label=bond_labels[bt], alpha=0.8, linewidth=1.5) ax1.set_xlabel('Time (ps)', fontsize=10) ax1.set_ylabel('Concentration (mol/L)', fontsize=10) ax1.legend(frameon=False, fontsize=8, loc='upper center', bbox_to_anchor=(0.5, -0.25), ncol=2) fig1.tight_layout(pad=0.5) fig1.subplots_adjust(bottom=0.35) # 为图例留出空间 fig1.savefig('hbond_densities.pdf') print("Saved hbond_densities.pdf") # 2. 时间关联函数图 (对数坐标) fig2, ax2 = plt.subplots(figsize=(3.5, 2.8)) # 选择最重要的6种氢键类型显示 for i, bt in enumerate(bond_types): if bt in selected_types: t_ps = np.arange(len(tcf_results[bt])) * frame_interval # 创建标签 label = bond_labels[bt] if bt in lifetimes and not np.isnan(lifetimes[bt]): label += f' (τ={lifetimes[bt]:.2f} ps)' if reliability.get(bt, "").startswith("unreliable"): label += '*' ax2.semilogy(t_ps, tcf_results[bt], color=colors[i], label=label, linewidth=1.5) # 标记时间窗口上限 ax2.axvline(x=max_time_window, color='gray', linestyle='--', alpha=0.7, linewidth=1) ax2.text(max_time_window*1.02, 0.5, 'Time window', rotation=90, va='center', ha='left', fontsize=7) ax2.set_xlabel('Time (ps)', fontsize=10) ax2.set_ylabel('Hydrogen Bond TCF', fontsize=10) ax2.set_xlim(0, max_time_window * 1.1) ax2.set_ylim(1e-3, 1.1) ax2.legend(frameon=False, fontsize=7, loc='upper right', bbox_to_anchor=(1.0, 1.0)) # 添加可靠性说明 if any("unreliable" in rel for bt, rel in reliability.items() if isinstance(rel, str)): ax2.text(0.95, 0.05, "* Unreliable fit (τ > 80% time window)", transform=ax2.transAxes, ha='right', fontsize=6) fig2.tight_layout(pad=0.5) fig2.savefig('hbond_tcf.pdf') print("Saved hbond_tcf.pdf") # 3. 氢键寿命比较图 fig3, ax3 = plt.subplots(figsize=(3.5, 2.8)) # 收集可靠寿命数据 valid_lifetimes = [] valid_labels = [] for bt in selected_types: if bt in lifetimes and not np.isnan(lifetimes[bt]) and "reliable" in reliability.get(bt, ""): valid_lifetimes.append(lifetimes[bt]) valid_labels.append(bond_labels[bt]) if valid_lifetimes: y_pos = np.arange(len(valid_lifetimes)) ax3.barh(y_pos, valid_lifetimes, color=colors[:len(valid_lifetimes)], alpha=0.8) ax3.set_yticks(y_pos) ax3.set_yticklabels(valid_labels, fontsize=8) ax3.set_xlabel('Lifetime (ps)', fontsize=10) ax3.set_title('Hydrogen Bond Lifetimes', fontsize=10) fig3.tight_layout(pad=0.5) fig3.savefig('hbond_lifetimes.pdf') print("Saved hbond_lifetimes.pdf") # 打印结果 print("\nResults Summary:") print("="*85) print("{:<20} {:<15} {:<15} {:<20}".format( "Bond Type", "Avg Density", "Lifetime (ps)", "Reliability")) print("-"*85) for bt in bond_types: lifetime = lifetimes.get(bt, np.nan) rel = reliability.get(bt, "N/A") print("{:<20} {:<15.4f} {:<15.3f} {:<20}".format( bond_labels[bt], avg_densities[bt], lifetime, rel )) print("="*85) def exp_decay(t, tau, A): return A * np.exp(-t / tau) if __name__ == "__main__": # 命令行参数解析 parser = argparse.ArgumentParser(description='Hydrogen Bond Lifetime Analysis') parser.add_argument('--workers', type=int, default=max(1, cpu_count() - 2), help='Number of parallel workers (default: CPU cores - 2)') parser.add_argument('--vasprun', type=str, default=DEFAULT_VASPRUN_PATH, help='Path to vasprun.xml file') args = parser.parse_args() print(f"Starting analysis with {args.workers} workers") print(f"Using vasprun file: {args.vasprun}") main_optimized(workers=args.workers, vasprun_path=args.vasprun)以上为VASP计算氢键寿命,但是现在我需要计算LAMMPS,在计算LAMMPS的时候可以参考一下代码import MDAnalysis as mda import numpy as np import numba as nb from MDAnalysis.lib.distances import capped_distance, calc_angles, distance_array import multiprocessing as mp from tqdm.auto import tqdm import os import argparse import matplotlib.pyplot as plt import pandas as pd import zarr import time import gc import sys from scipy.spatial import cKDTree # ====================== 优化参数设置 ====================== DEFAULT_MAX_FRAMES = 1000 # 只处理前100ps (1000帧) DEFAULT_MEM_LIMIT = 100 # GB (126GB内存中保留6GB给系统) DEFAULT_N_WORKERS = 58 # 默认使用64个核心 BOX_SIZE = 80.0 CALC_REGION = np.array([0, 80, 0, 80, 0, 80], dtype=np.float32) HB_DIST_CUTOFF = 3.5 HB_ANGLE_CUTOFF = 130 FRAME_DT = 0.1 # ps TAU_MAX = 20.0 # ps DT_ORIGIN = 2.0 # ps MEMORY_LIMIT = 100 # GB (126GB内存中保留6GB给系统) # 原子类型常量 - 扩展包含氟原子 O_WATER, O_HYDRONIUM, O_PHYDROXYL, O_PDOUBLE, O_PBRIDGE = 0, 1, 2, 3, 4 H_WATER, H_HYDRONIUM, H_PHOSPHORIC, P, F_FLUORIDE, H_FLUORIDE = 5, 6, 7, 8, 9, 10 UNKNOWN = 99 # 原子类型反向映射 ATOM_TYPE_NAMES = { O_WATER: 'O_water', O_HYDRONIUM: 'O_hydronium', O_PHYDROXYL: 'O_phydroxyl', O_PDOUBLE: 'O_pdouble', O_PBRIDGE: 'O_pbridge', H_WATER: 'H_water', H_HYDRONIUM: 'H_hydronium', H_PHOSPHORIC: 'H_phosphoric', P: 'P', F_FLUORIDE: 'F', H_FLUORIDE: 'H_fluoride', UNKNOWN: 'UNK' } # ====================== 核心优化函数 ====================== @nb.njit(nogil=True, fastmath=True, cache=True, parallel=True) def classify_atoms_vectorized(positions, types, box): """使用Numba加速的向量化原子分类函数,包含氟原子处理""" n_atoms = len(types) atom_labels = np.full(n_atoms, UNKNOWN, dtype=np.int8) # 提取原子索引 o_indices = np.where(types == 2)[0] # 氧原子 h_indices = np.where(types == 1)[0] # 氢原子 p_indices = np.where(types == 7)[0] # 磷原子 f_indices = np.where(types == 3)[0] # 氟原子 # 标记氟原子 for f_idx in f_indices: atom_labels[f_idx] = F_FLUORIDE # 预计算O-H距离矩阵 oh_distances = np.empty((len(o_indices), len(h_indices)), dtype=np.float32) for i in nb.prange(len(o_indices)): o_idx = o_indices[i] for j in range(len(h_indices)): h_idx = h_indices[j] d = positions[o_idx] - positions[h_idx] d -= np.round(d / box) * box # PBC校正 oh_distances[i, j] = np.sqrt(np.sum(d**2)) # 预计算P-O距离矩阵 po_distances = np.empty((len(p_indices), len(o_indices)), dtype=np.float32) for i in nb.prange(len(p_indices)): p_idx = p_indices[i] for j in range(len(o_indices)): o_idx = o_indices[j] d = positions[p_idx] - positions[o_idx] d -= np.round(d / box) * box po_distances[i, j] = np.sqrt(np.sum(d**2)) # 预计算F-H距离矩阵 fh_distances = np.empty((len(f_indices), len(h_indices)), dtype=np.float32) for i in nb.prange(len(f_indices)): f_idx = f_indices[i] for j in range(len(h_indices)): h_idx = h_indices[j] d = positions[f_idx] - positions[h_idx] d -= np.round(d / box) * box fh_distances[i, j] = np.sqrt(np.sum(d**2)) # 分类氧原子 (并行) for i in nb.prange(len(o_indices)): o_idx = o_indices[i] h_count = np.sum(oh_distances[i] < 1.2) # O-H键长阈值 p_count = 0 for j in range(len(p_indices)): if po_distances[j, i] < 1.8: # P-O键长阈值 p_count += 1 if h_count == 3: atom_labels[o_idx] = O_HYDRONIUM elif h_count == 2: atom_labels[o_idx] = O_WATER elif h_count == 1 and p_count == 1: atom_labels[o_idx] = O_PHYDROXYL elif p_count == 1: atom_labels[o_idx] = O_PDOUBLE elif p_count == 2: atom_labels[o_idx] = O_PBRIDGE # 分类氢原子 (并行) for j in nb.prange(len(h_indices)): h_idx = h_indices[j] min_dist = 10.0 min_o_idx = -1 for i in range(len(o_indices)): if oh_distances[i, j] < min_dist: min_dist = oh_distances[i, j] min_o_idx = o_indices[i] # 检查是否连接到氟原子 min_f_dist = 10.0 min_f_idx = -1 for i in range(len(f_indices)): if fh_distances[i, j] < min_f_dist: min_f_dist = fh_distances[i, j] min_f_idx = f_indices[i] # 优先判断氟键 if min_f_dist < 1.2: # F-H键长阈值 atom_labels[h_idx] = H_FLUORIDE elif min_dist < 1.2: # O-H键长阈值 o_type = atom_labels[min_o_idx] if o_type == O_HYDRONIUM: atom_labels[h_idx] = H_HYDRONIUM elif o_type == O_WATER: atom_labels[h_idx] = H_WATER elif o_type == O_PHYDROXYL: atom_labels[h_idx] = H_PHOSPHORIC # 分类磷原子 for p_idx in p_indices: atom_labels[p_idx] = P return atom_labels def find_hbonds_kdtree(positions, atom_labels, box, kdtree=None): """使用KD树加速的氢键检测,包含氟键处理""" # 识别供体和受体 donor_mask = np.isin(atom_labels, [O_WATER, O_HYDRONIUM, O_PHYDROXYL, F_FLUORIDE]) acceptor_mask = np.isin(atom_labels, [O_WATER, O_HYDRONIUM, O_PDOUBLE, O_PBRIDGE, O_PHYDROXYL, F_FLUORIDE]) donor_indices = np.where(donor_mask)[0] acceptor_indices = np.where(acceptor_mask)[0] if len(donor_indices) == 0 or len(acceptor_indices) == 0: return [], kdtree # 创建或复用KD树 if kdtree is None: kdtree = cKDTree(positions[acceptor_indices], boxsize=box[:3]) # 找到供体氢原子 h_mask = ((atom_labels == H_WATER) | (atom_labels == H_HYDRONIUM) | (atom_labels == H_PHOSPHORIC) | (atom_labels == H_FLUORIDE)) h_positions = positions[h_mask] h_indices = np.where(h_mask)[0] hbonds = [] processed_pairs = set() # 避免重复处理 # 处理每个供体 for donor_idx in donor_indices: # 找到与供体相连的氢原子 donor_pos = positions[donor_idx] d_h_dist = distance_array(donor_pos.reshape(1, -1), h_positions, box=box)[0] connected_h = np.where(d_h_dist < 1.2)[0] for h_idx in connected_h: hydrogen_idx = h_indices[h_idx] hydrogen_pos = positions[hydrogen_idx] # 使用KD树查找附近受体 neighbors = kdtree.query_ball_point(hydrogen_pos, HB_DIST_CUTOFF) for j in neighbors: acceptor_idx = acceptor_indices[j] # 跳过自相互作用 if donor_idx == acceptor_idx: continue # 检查是否已处理 pair_id = tuple(sorted((donor_idx, acceptor_idx))) if pair_id in processed_pairs: continue processed_pairs.add(pair_id) # 计算角度 angle = calc_angles(donor_pos, hydrogen_pos, positions[acceptor_idx], box=box) angle_deg = np.degrees(angle) if angle_deg > HB_ANGLE_CUTOFF: dist = np.linalg.norm(hydrogen_pos - positions[acceptor_idx]) # 确定氢键类型 donor_type = ATOM_TYPE_NAMES.get(atom_labels[donor_idx], 'UNK').split('_')[0] acceptor_type = ATOM_TYPE_NAMES.get(atom_labels[acceptor_idx], 'UNK').split('_')[0] # 简化类型命名 if donor_type == 'O' and acceptor_type == 'O': hbond_type = 'O-O' elif donor_type == 'O' and acceptor_type == 'P': hbond_type = 'O-P' elif donor_type == 'P' and acceptor_type == 'O': hbond_type = 'P-O' elif donor_type == 'P' and acceptor_type == 'P': hbond_type = 'P-P' elif donor_type == 'H3O' and acceptor_type == 'O': hbond_type = 'H3O-O' elif donor_type == 'H3O' and acceptor_type == 'P': hbond_type = 'H3O-P' elif donor_type == 'F' or acceptor_type == 'F': hbond_type = f"{donor_type}-F" if acceptor_type == 'F' else f"F-{acceptor_type}" else: hbond_type = f"{donor_type}-{acceptor_type}" hbonds.append(( donor_idx, hydrogen_idx, acceptor_idx, dist, angle_deg, hbond_type )) return hbonds, kdtree # ====================== 并行处理框架 ====================== def process_frame_chunk(args): """处理一批帧的优化函数,包含详细调试输出""" start_idx, end_idx, top_file, traj_file, debug = args try: universe = mda.Universe(top_file, traj_file, topology_format="DATA", format="LAMMPSDUMP", atom_style="id type charge x y z") # 预加载区域掩码 universe.trajectory[0] positions = universe.atoms.positions mask = ((positions[:, 0] >= CALC_REGION[0]) & (positions[:, 0] <= CALC_REGION[1]) & (positions[:, 1] >= CALC_REGION[2]) & (positions[:, 1] <= CALC_REGION[3]) & (positions[:, 2] >= CALC_REGION[4]) & (positions[:, 2] <= CALC_REGION[5])) filtered_indices = np.where(mask)[0] types_all = universe.atoms.types.astype(int) if debug: print(f"区域筛选: 原始原子数 {len(positions)} -> 筛选后 {len(filtered_indices)}") results = [] kdtree = None for frame_idx in range(start_idx, end_idx): if frame_idx >= len(universe.trajectory): break universe.trajectory[frame_idx] positions = universe.atoms.positions # 应用区域筛选 filtered_pos = positions[filtered_indices] filtered_types = types_all[filtered_indices] # 分类原子 atom_labels = classify_atoms_vectorized( filtered_pos, filtered_types, universe.dimensions ) if debug: unique, counts = np.unique(atom_labels, return_counts=True) print(f"\n帧 {frame_idx} 原子类型统计:") for u, c in zip(unique, counts): print(f" {ATOM_TYPE_NAMES.get(u, 'UNKNOWN')}: {c}") # 识别氢键 hbonds, kdtree = find_hbonds_kdtree( filtered_pos, atom_labels, universe.dimensions, kdtree ) if debug: print(f"检测到 {len(hbonds)} 个氢键") if len(hbonds) > 0: for i, hb in enumerate(hbonds[:min(5, len(hbonds))]): donor_name = ATOM_TYPE_NAMES.get(atom_labels[hb[0]], "UNK") acceptor_name = ATOM_TYPE_NAMES.get(atom_labels[hb[2]], "UNK") print(f" 氢键 {i+1}: {donor_name}-{acceptor_name} " f"距离={hb[3]:.2f}&Aring;, 角度={hb[4]:.1f}°") # 存储压缩的氢键信息 hbond_data = [] for hb in hbonds: # 简化类型映射 type_code = { 'O-O': 0, 'O-P': 1, 'P-O': 2, 'P-P': 3, 'H3O-O': 4, 'H3O-P': 5, 'O-F': 6, 'P-F': 7, 'F-O': 8, 'F-P': 9, 'F-F': 10, 'H3O-F': 11, 'F-H3O': 12 }.get(hb[5], 13) # 默认其他类型 hbond_data.append((hb[0], hb[1], hb[2], type_code)) results.append((frame_idx, np.array(hbond_data, dtype=np.int32))) return results except Exception as e: import traceback print(f"处理帧块 [{start_idx}-{end_idx}] 时出错: {str(e)}") print(traceback.format_exc()) # 返回空结果但保持结构 error_results = [] for frame_idx in range(start_idx, min(end_idx, len(universe.trajectory))): error_results.append((frame_idx, np.array([], dtype=np.int32))) return error_results # ====================== 氢键跟踪优化 ====================== @nb.njit(nogil=True, fastmath=True, parallel=True) def track_hbonds_batch(origin_frame, positions_cache, hbond_data, tau_max_frames, box): """批量跟踪氢键寿命的Numba优化函数""" n_hbonds = len(hbond_data) survival_matrix = np.zeros((n_hbonds, tau_max_frames), dtype=np.uint8) # 为所有氢键并行处理 for i in nb.prange(n_hbonds): donor, hydrogen, acceptor, _ = hbond_data[i] # 检查初始帧中的氢键是否存在 if donor >= positions_cache[0].shape[0] or \ hydrogen >= positions_cache[0].shape[0] or \ acceptor >= positions_cache[0].shape[0]: continue # 初始距离和角度 d0 = positions_cache[0][donor] h0 = positions_cache[0][hydrogen] a0 = positions_cache[0][acceptor] # 检查初始条件 dist0 = np.sqrt(np.sum((h0 - a0)**2)) angle_rad = calc_angles(d0, h0, a0, box=box) angle0 = np.degrees(angle_rad) if dist0 > HB_DIST_CUTOFF or angle0 < HB_ANGLE_CUTOFF: continue survival_matrix[i, 0] = 1 # 跟踪后续帧 for t in range(1, tau_max_frames): if t >= len(positions_cache): break pos_t = positions_cache[t] if donor >= pos_t.shape[0] or hydrogen >= pos_t.shape[0] or acceptor >= pos_t.shape[0]: break dt = pos_t[donor] ht = pos_t[hydrogen] at = pos_t[acceptor] # 应用PBC校正 d_vec = ht - dt d_vec -= np.round(d_vec / box) * box a_vec = at - ht a_vec -= np.round(a_vec / box) * box dist = np.sqrt(np.sum((ht - at)**2)) angle_rad = calc_angles(dt, ht, at, box=box) angle = np.degrees(angle_rad) if dist <= HB_DIST_CUTOFF and angle >= HB_ANGLE_CUTOFF: survival_matrix[i, t] = 1 else: break return survival_matrix # ====================== 氢键跟踪全局函数 ====================== def process_origin(origin, all_hbonds, traj_cache, n_frames, tau_max_frames, box): """处理单个时间原点的氢键寿命跟踪""" # 获取当前原点帧的氢键 origin_hbonds = all_hbonds[origin] if origin_hbonds is None or len(origin_hbonds) == 0: return np.zeros(tau_max_frames), np.zeros(tau_max_frames) # 准备帧数据缓存 frame_cache = [] for t in range(tau_max_frames): frame_idx = origin + t if frame_idx < n_frames: frame_cache.append(traj_cache.get_frame(frame_idx)) else: break # 批量跟踪氢键 survival_matrix = track_hbonds_batch( origin, frame_cache, origin_hbonds, tau_max_frames, box ) # 计算SCF scf = np.sum(survival_matrix, axis=0) count = np.array([np.sum(survival_matrix[:, :t+1].any(axis=1)) for t in range(tau_max_frames)]) return scf, count # 全局包装函数 def process_origin_wrapper(args): """包装器函数,用于传递多个参数""" return process_origin(*args) # ====================== 内存管理优化 ====================== class TrajectoryCache: """高效轨迹缓存管理器""" def __init__(self, universe, max_frames, mem_limit_gb): self.universe = universe self.n_atoms = len(universe.atoms) self.frame_size = self.n_atoms * 3 * 4 # 每帧大小 (bytes) self.max_frames = min(max_frames, len(universe.trajectory)) self.mem_limit = mem_limit_gb * 1e9 # 计算内存中可以缓存的帧数 self.cache_capacity = max(1, int(self.mem_limit * 0.7 / self.frame_size)) self.cache = {} self.access_counter = {} self.counter = 0 # 创建Zarr磁盘缓存 cache_dir = f'temp_positions_cache_{os.getpid()}' os.makedirs(cache_dir, exist_ok=True) self.store = zarr.DirectoryStore(cache_dir) self.root = zarr.group(store=self.store, overwrite=True) self.disk_cache = self.root.zeros( 'positions', shape=(self.max_frames, self.n_atoms, 3), chunks=(1, self.n_atoms, 3), dtype='float32' ) # 预填充磁盘缓存 for frame_idx in tqdm(range(self.max_frames), desc="预缓存轨迹"): self.universe.trajectory[frame_idx] self.disk_cache[frame_idx] = universe.atoms.positions.astype(np.float32) def get_frame(self, frame_idx): """获取帧数据,使用内存+磁盘混合缓存""" if frame_idx not in self.cache: # 如果缓存已满,移除最久未使用的 if len(self.cache) >= self.cache_capacity: least_used = min(self.access_counter, key=self.access_counter.get) del self.cache[least_used] del self.access_counter[least_used] # 从磁盘加载到内存 self.cache[frame_idx] = self.disk_cache[frame_idx][:] # 更新访问计数 self.counter += 1 self.access_counter[frame_idx] = self.counter return self.cache[frame_idx] def cleanup(self): """清理缓存""" self.cache.clear() self.access_counter.clear() gc.collect() # 清理磁盘缓存 if os.path.exists(self.store.path): import shutil shutil.rmtree(self.store.path) # ====================== 主程序优化 ====================== def main(top_file, traj_file, output_prefix, max_frames=DEFAULT_MAX_FRAMES, mem_limit=DEFAULT_MEM_LIMIT, workers=DEFAULT_N_WORKERS, debug=False): print(f"加载拓扑文件: {top_file}") print(f"加载轨迹文件: {traj_file}") # 创建Universe universe = mda.Universe(top_file, traj_file, topology_format="DATA", format="LAMMPSDUMP", atom_style="id type charge x y z") n_frames = min(len(universe.trajectory), max_frames) n_atoms = len(universe.atoms) print(f"系统信息: {n_atoms} 个原子, {n_frames} 帧 ({n_frames*FRAME_DT}ps)") # 打印原子类型分布 (调试) if debug: print("\n原子类型分布 (第一帧):") unique_types, counts = np.unique(universe.atoms.types.astype(int), return_counts=True) for t, c in zip(unique_types, counts): print(f" 类型 {t}: {c} 个原子") # 初始化轨迹缓存 traj_cache = TrajectoryCache(universe, n_frames, mem_limit) # ===== 第一阶段: 氢键检测 ===== print("\n===== 阶段1: 氢键检测 =====") start_time = time.time() # 计算批处理大小 frame_size = n_atoms * 3 * 4 # 位置数据大小 (bytes) batch_size = max(1, int(mem_limit * 0.3 * 1e9 / (frame_size * workers))) batches = [(i, min(i+batch_size, n_frames)) for i in range(0, n_frames, batch_size)] print(f"使用 {len(batches)} 个批次, 每批 {batch_size} 帧") print(f"启动 {workers} 个工作进程...") # 并行处理帧 all_hbonds = [None] * n_frames with mp.Pool(processes=workers) as pool: args = [(start, end, top_file, traj_file, debug) for start, end in batches] results = list(tqdm(pool.imap_unordered(process_frame_chunk, args, chunksize=1), total=len(batches), desc="氢键检测")) # 整合结果 for batch_result in results: for frame_idx, hbonds in batch_result: all_hbonds[frame_idx] = hbonds hbond_detect_time = time.time() - start_time print(f"氢键检测完成! 耗时: {hbond_detect_time:.2f} 秒") # ===== 第二阶段: 氢键寿命跟踪 ===== print("\n===== 阶段2: 氢键寿命跟踪 =====") start_time = time.time() tau_max_frames = int(TAU_MAX / FRAME_DT) dt_origin_frames = int(DT_ORIGIN / FRAME_DT) origins = list(range(0, n_frames - tau_max_frames, dt_origin_frames)) print(f"跟踪 {len(origins)} 个时间原点的氢键寿命...") print(f"时间窗口: {TAU_MAX}ps ({tau_max_frames}帧)") # 准备结果数组 scf_total = np.zeros(tau_max_frames) count_total = np.zeros(tau_max_frames) # 准备参数列表 args_list = [(origin, all_hbonds, traj_cache, n_frames, tau_max_frames, universe.dimensions) for origin in origins] # 并行处理时间原点 with mp.Pool(processes=workers) as pool: results = list(tqdm( pool.imap(process_origin_wrapper, args_list, chunksize=10), total=len(origins), desc="跟踪氢键" )) for scf, count in results: valid_len = min(len(scf), len(scf_total)) scf_total[:valid_len] += scf[:valid_len] count_total[:valid_len] += count[:valid_len] # 清理缓存 traj_cache.cleanup() # ===== 结果处理 ===== print("\n===== 阶段3: 结果分析 =====") # 计算平均SCF scf_avg = np.zeros_like(scf_total) valid_mask = count_total > 0 scf_avg[valid_mask] = scf_total[valid_mask] / count_total[valid_mask] # 计算弛豫时间 time_axis = np.arange(tau_max_frames) * FRAME_DT tau_relax = np.trapz(scf_avg, time_axis) # 氢键统计 hbond_counts = {} for frame_hbonds in all_hbonds: if frame_hbonds is None: continue for hb in frame_hbonds: hb_type = hb[3] hbond_counts[hb_type] = hbond_counts.get(hb_type, 0) + 1 total_hbonds = sum(hbond_counts.values()) avg_hbonds = total_hbonds / n_frames if n_frames > 0 else 0 # 计算氢键密度 calc_volume = ((CALC_REGION[1]-CALC_REGION[0]) * (CALC_REGION[3]-CALC_REGION[2]) * (CALC_REGION[5]-CALC_REGION[4])) * 1e-27 # m³ hbond_density = avg_hbonds / (calc_volume * 6.022e23) # mol/L # 保存结果 scf_df = pd.DataFrame({ 'Time (ps)': time_axis, 'SCF': scf_avg, 'Count': count_total }) scf_df.to_csv(f"{output_prefix}_scf.csv", index=False) stats_data = { 'Avg_HBonds_per_frame': avg_hbonds, 'HBond_Density_mol/L': hbond_density, 'Relaxation_Time_ps': tau_relax, 'Total_Frames': n_frames, 'HBond_Detection_Time_s': hbond_detect_time, 'Lifetime_Tracking_Time_s': time.time() - start_time } # 添加按类型的氢键统计 type_names = { 0: 'O-O', 1: 'O-P', 2: 'P-O', 3: 'P-P', 4: 'H3O-O', 5: 'H3O-P', 6: 'O-F', 7: 'P-F', 8: 'F-O', 9: 'F-P', 10: 'F-F', 11: 'H3O-F', 12: 'F-H3O', 13: 'Other' } for type_code, count in hbond_counts.items(): stats_data[f"HBond_{type_names.get(type_code, 'Unknown')}"] = count / n_frames stats_df = pd.DataFrame([stats_data]) stats_df.to_csv(f"{output_prefix}_stats.csv", index=False) # 绘图 plt.figure(figsize=(10, 7), dpi=150) plt.plot(time_axis, scf_avg, 'b-', linewidth=2.5, label='SCF') plt.fill_between(time_axis, scf_avg, alpha=0.2, color='blue') # 添加指数拟合 try: from scipy.optimize import curve_fit def exp_decay(t, a, tau): return a * np.exp(-t / tau) popt, _ = curve_fit(exp_decay, time_axis, scf_avg, p0=[1.0, 5.0]) plt.plot(time_axis, exp_decay(time_axis, *popt), 'r--', label=f'Exp Fit: τ={popt[1]:.2f}ps') except: pass plt.xlabel('Time (ps)', fontsize=14, fontfamily='serif') plt.ylabel('Survival Probability', fontsize=14, fontfamily='serif') plt.title(f'H-Bond Lifetime in Phosphoric Acid Solution\nτ_relax = {tau_relax:.2f} ps', fontsize=16, fontfamily='serif') plt.grid(alpha=0.3, linestyle='--') plt.xlim(0, TAU_MAX) plt.ylim(0, 1.05) plt.legend(fontsize=12) plt.tight_layout() # 添加统计信息框 stats_text = (f"Avg HBonds/frame: {avg_hbonds:.1f}\n" f"HBond density: {hbond_density:.2f} mol/L\n" f"Frames analyzed: {n_frames}") plt.figtext(0.75, 0.25, stats_text, bbox=dict(facecolor='white', alpha=0.8), fontsize=10, fontfamily='monospace') plt.savefig(f"{output_prefix}_lifetime.png", dpi=300, bbox_inches='tight') total_time = time.time() - start_time print(f"\n分析完成! 总耗时: {total_time/60:.2f} 分钟") print(f"结果保存到: {output_prefix}_*.csv/png") if __name__ == "__main__": # 简化命令行参数解析 parser = argparse.ArgumentParser( description='优化版氢键寿命分析 - 湿法磷酸体系专用', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) # 必需参数 parser.add_argument('top_file', type=str, help='拓扑文件路径 (.data)') parser.add_argument('traj_file', type=str, help='轨迹文件路径 (.lammpstrj)') parser.add_argument('output_prefix', type=str, help='输出文件前缀') # 可选参数(设置默认值) parser.add_argument('--workers', type=int, default=DEFAULT_N_WORKERS, help=f'并行工作进程数 (默认: {DEFAULT_N_WORKERS})') parser.add_argument('--max_frames', type=int, default=DEFAULT_MAX_FRAMES, help=f'处理的最大帧数 (默认: {DEFAULT_MAX_FRAMES})') parser.add_argument('--mem_limit', type=int, default=DEFAULT_MEM_LIMIT, help=f'内存限制 (GB) (默认: {DEFAULT_MEM_LIMIT})') parser.add_argument('--debug', action='store_true', help='启用详细调试输出') args = parser.parse_args() # 打印配置信息 print("="*60) print(f"氢键寿命分析配置:") print(f" 拓扑文件: {args.top_file}") print(f" 轨迹文件: {args.traj_file}") print(f" 输出前缀: {args.output_prefix}") print(f" 工作进程: {args.workers}") print(f" 处理帧数: {args.max_frames} (前 {args.max_frames * FRAME_DT}ps)") print(f" 内存限制: {args.mem_limit}GB") print(f" 调试模式: {'开启' if args.debug else '关闭'}") print("="*60) # 检查文件是否存在 if not os.path.exists(args.top_file): print(f"错误: 拓扑文件不存在 - {args.top_file}") sys.exit(1) if not os.path.exists(args.traj_file): print(f"错误: 轨迹文件不存在 - {args.traj_file}") sys.exit(1) # 确保输出目录存在 output_dir = os.path.dirname(args.output_prefix) if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) try: main( args.top_file, args.traj_file, args.output_prefix, max_frames=args.max_frames, mem_limit=args.mem_limit, workers=args.workers, debug=args.debug ) except Exception as e: print(f"程序执行出错: {str(e)}") import traceback traceback.print_exc() sys.exit(1) 后面这部分的代码为LAMMPS计算的代码,但是LAMMPS计算的代码显示结果为空而报错,可能是识别氢键为0也可能是有空表格导致,我也不清楚,但是上面VASP的部分已经能够成功运行,我希望你能够参考vasp的代码来优化LAMMPS的代码,针对LAMMPS的代码首先需要加速计算,在这里注意pickle问题,执行命令如LAMMPS的参考,出图部分需要符合The Journal of Chemical Physics期刊要求,注意int字符,主要字体情况,本电脑有64核126g内存,使用58核和100g内存。不要因为时间问题省略代码,输出的代码不要有错别字以及常规错误
最新发布
07-12
修正这段代码,import cv2 import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt def identify_background_and_generate_patch(image_path, sample_size=10, expansion_factor=5, num_colors=3, tolerance=10): """ 识别图像背景并生成扩展填充样本 参数: image_path: 输入图像路径 sample_size: 背景采样区域大小(像素) expansion_factor: 背景样本扩展倍数 num_colors: 背景颜色聚类数量 tolerance: 背景颜色容差范围 """ # 1. 读取图像 img = cv2.imread(image_path) if img is None: raise FileNotFoundError(f"无法加载图像: {image_path}") img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) height, width, _ = img_rgb.shape # 2. 边缘采样获取初始背景样本 edge_samples = [] # 上边缘 edge_samples.append(img_rgb[0:sample_size, 0:width]) # 下边缘 edge_samples.append(img_rgb[height - sample_size:height, 0:width]) # 左边缘 edge_samples.append(img_rgb[0:height, 0:sample_size]) # 右边缘 edge_samples.append(img_rgb[0:height, width - sample_size:width]) background_samples = np.vstack(edge_samples) # 3. 使用K-means聚类识别主要背景色 pixels = background_samples.reshape(-1, 3).astype(np.float32) kmeans = KMeans(n_clusters=num_colors, random_state=0).fit(pixels) dominant_colors = kmeans.cluster_centers_.astype(np.uint8) # 4. 创建背景掩膜 background_mask = np.zeros((height, width), dtype=np.uint8) for color in dominant_colors: lower_bound = np.clip(color - tolerance, 0, 255) upper_bound = np.clip(color + tolerance, 0, 255) color_mask = cv2.inRange(img_rgb, lower_bound, upper_bound) background_mask = cv2.bitwise_or(background_mask, color_mask) # 5. 形态学操作优化背景掩膜 kernel = np.ones((5, 5), np.uint8) background_mask = cv2.morphologyEx(background_mask, cv2.MORPH_CLOSE, kernel, iterations=2) background_mask = cv2.morphologyEx(background_mask, cv2.MORPH_OPEN, kernel, iterations=1) # 6. 提取背景区域轮廓 contours, _ = cv2.findContours( background_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) # 7. 轮廓扩张生成大样本 expanded_mask = np.zeros_like(background_mask) for contour in contours: # 计算轮廓边界框 x, y, w, h = cv2.boundingRect(contour) # 扩展边界框 expand_x = int(w * (expansion_factor - 1) / 2) expand_y = int(h * (expansion_factor - 1) / 2) new_x = max(0, x - expand_x) new_y = max(0, y - expand_y) new_w = min(width - new_x, w * expansion_factor) new_h = min(height - new_y, h * expansion_factor) # 提取背景样本 sample = img_rgb[new_y:new_y + new_h, new_x:new_x + new_w] # 添加到扩展掩膜 expanded_mask[new_y:new_y + new_h, new_x:new_x + new_w] = 255 # 8. 生成最终背景样本图像 background_patch = cv2.bitwise_and( img_rgb, img_rgb, mask=expanded_mask ) # 9. 可视化结果 plt.figure(figsize=(15, 10)) plt.subplot(231) plt.imshow(img_rgb) plt.title('原始图像') plt.axis('off') plt.subplot(232) plt.imshow(background_mask, cmap='gray') plt.title('背景掩膜') plt.axis('off') plt.subplot(233) plt.imshow(expanded_mask, cmap='gray') plt.title('扩展掩膜') plt.axis('off') plt.subplot(234) # 显示聚类颜色 color_swatches = np.zeros((50, 300, 3), dtype=np.uint8) for i, color in enumerate(dominant_colors): color_swatches[:, i * 100:(i + 1) * 100] = color plt.imshow(color_swatches) plt.title(f'主要背景色 (k={num_colors})') plt.axis('off') plt.subplot(235) plt.imshow(background_samples) plt.title('边缘背景样本') plt.axis('off') plt.subplot(236) plt.imshow(background_patch) plt.title('扩展背景样本') plt.axis('off') plt.tight_layout() plt.savefig('background_analysis.jpg') return background_patch, expanded_mask, dominant_colors # 使用示例 if __name__ == "__main__": # 生成背景样本 background_patch, mask, colors = identify_background_and_generate_patch( "zdk.jpg", sample_size=20, expansion_factor=3, num_colors=3, tolerance=15 ) # 保存结果 cv2.imwrite("background_patch.jpg", cv2.cvtColor(background_patch, cv2.COLOR_RGB2BGR)) cv2.imwrite("background_mask.jpg", mask) print(f"生成背景样本尺寸: {background_patch.shape[1]}x{background_patch.shape[0]}") print(f"识别到的主要背景色: {colors.tolist()}") C:\Users\86152\爬虫学习文件\fastApiProjectone\.venv\Scripts\python.exe H:\img_ocr_server\img_util\input\背景去色扩张.py Traceback (most recent call last): File "H:\img_ocr_server\img_util\input\背景去色扩张.py", line 138, in <module> background_patch, mask, colors = identify_background_and_generate_patch( File "H:\img_ocr_server\img_util\input\背景去色扩张.py", line 38, in identify_background_and_generate_patch background_samples = np.vstack(edge_samples) File "<__array_function__ internals>", line 200, in vstack File "C:\Users\86152\爬虫学习文件\fastApiProjectone\.venv\lib\site-packages\numpy\core\shape_base.py", line 296, in vstack return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting) File "<__array_function__ internals>", line 200, in concatenate ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 2346 and the array at index 2 has size 20 Process finished with exit code 1
07-06
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

heda3

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值