RDF data

博客提供了一个链接http://km.aifb.kit.edu/projects/btc-2012/ ,但未明确该链接指向的具体信息技术内容。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 

 http://km.aifb.kit.edu/projects/btc-2012/

import numpy as np import matplotlib.pyplot as plt from pymatgen.io.vasp import Vasprun from pymatgen.core.structure import Structure from scipy.signal import savgol_filter from scipy.spatial import cKDTree from tqdm import tqdm import matplotlib as mpl import warnings from collections import defaultdict import os import argparse import csv from datetime import datetime import sys 忽略可能的警告 warnings.filterwarnings(“ignore”, category=UserWarning) 专业绘图设置 plt.style.use(‘seaborn-v0_8-whitegrid’) mpl.rcParams.update({ ‘font.family’: ‘sans-serif’, ‘font.sans-serif’: [‘Arial’, ‘DejaVu Sans’], ‘font.size’: 12, ‘axes.labelsize’: 14, ‘axes.titlesize’: 16, ‘xtick.labelsize’: 12, ‘ytick.labelsize’: 12, ‘figure.dpi’: 300, ‘savefig.dpi’: 300, ‘figure.figsize’: (10, 7), ‘lines.linewidth’: 2.5, ‘legend.fontsize’: 11, ‘legend.framealpha’: 0.9, ‘mathtext.default’: ‘regular’ }) def get_parent_p(struct, atom_index): “”“获取给定氧原子所属的磷酸基团(P原子索引)”“” site = struct[atom_index] if site.species_string != “O”: return None # 查找最近的P原子(P-O键长通常在1.5-1.6Å) p_neighbors = struct.get_neighbors(site, r=1.8) for neighbor in p_neighbors: if neighbor[0].species_string == "P": return neighbor[0].index return None def identify_atom_types(struct): “”“识别所有关键原子类型(增强版)”“” # 磷酸氧分类 p_oxygens = {“P=O”: [], “P-O”: [], “P-OH”: []} phosphate_hydrogens = [] # 仅P-OH基团中的H原子 # 水合氢离子识别 hydronium_oxygens = [] hydronium_hydrogens = [] # H₃O⁺中的H原子 # 普通水分子 water_oxygens = [] water_hydrogens = [] # 普通水中的H原子 # 氟离子 fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"] # 铝离子 aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"] # 创建快速邻居查找表 neighbor_cache = defaultdict(list) for i, site in enumerate(struct): if site.species_string == "O": neighbors = struct.get_neighbors(site, r=1.3) h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"] neighbor_cache[i] = h_neighbors # 识别水合氢离子 (H₃O⁺) if len(h_neighbors) == 3: hydronium_oxygens.append(i) for h_site in h_neighbors: hydronium_hydrogens.append(h_site.index) # 识别磷酸基团 for site in struct: if site.species_string == "P": neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径 # 筛选氧原子邻居 o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"] if len(o_neighbors) < 4: # 如果找不到4个氧原子,使用旧方法 for neighbor in o_neighbors: nn_site = neighbor[0] if neighbor[1] < 1.55: p_oxygens["P=O"].append(nn_site.index) else: if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)): p_oxygens["P-OH"].append(nn_site.index) else: p_oxygens["P-O"].append(nn_site.index) continue # 按距离排序 o_neighbors.sort(key=lambda x: x[1]) # 最近的氧原子为P=O p_double_o = o_neighbors[0][0] p_oxygens["P=O"].append(p_double_o.index) # 其他三个氧原子 for i in range(1, 4): o_site = o_neighbors[i][0] # 检查氧原子上是否有氢 if neighbor_cache.get(o_site.index, []): p_oxygens["P-OH"].append(o_site.index) else: p_oxygens["P-O"].append(o_site.index) # 识别P-OH基团中的H原子 (磷酸中的H) for o_idx in p_oxygens["P-OH"]: # 获取与P-OH氧相连的H原子 h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": phosphate_hydrogens.append(h_site.index) # 识别普通水分子 (排除磷酸氧和水合氢离子) for i, site in enumerate(struct): if site.species_string == "O" and i not in hydronium_oxygens: is_phosphate_oxygen = False for cat in p_oxygens.values(): if i in cat: is_phosphate_oxygen = True break if not is_phosphate_oxygen: water_oxygens.append(i) # 识别普通水分子中的H原子 (水中的H) for o_idx in water_oxygens: h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": water_hydrogens.append(h_site.index) # 验证氢原子总数 total_h = len([s for s in struct if s.species_string == "H"]) classified_h = ( len(phosphate_hydrogens) + len(water_hydrogens) + len(hydronium_hydrogens) ) if total_h != classified_h: print(f" Warning: Hydrogen mismatch! Total H: {total_h}, Classified H: {classified_h}") return { "phosphate_oxygens": p_oxygens, "phosphate_hydrogens": phosphate_hydrogens, "water_oxygens": water_oxygens, "water_hydrogens": water_hydrogens, "hydronium_oxygens": hydronium_oxygens, "hydronium_hydrogens": hydronium_hydrogens, "fluoride_atoms": fluoride_atoms, "aluminum_atoms": aluminum_atoms } def calculate_rdf(structures, center_selector, target_selector, r_max=8.0, bin_width=0.05, progress=True, exclude_bonds=True, bond_threshold=1.3, exclude_same_group=False): “”" 计算径向分布函数(每帧重新识别原子类型) “”" bins = np.arange(0, r_max, bin_width) hist = np.zeros(len(bins) - 1) total_centers = 0 total_targets = 0 total_volume = 0 iterator = tqdm(structures, desc="Calculating RDF") if progress else structures for struct in iterator: # 关键修改:对每一帧进行原子类型识别 atom_types = identify_atom_types(struct) # 识别中心原子 centers = center_selector(atom_types) # 识别目标原子 targets = target_selector(atom_types) # 检查是否有足够的原子 if len(centers) == 0 or len(targets) == 0: continue total_centers += len(centers) total_targets += len(targets) total_volume += struct.volume # 获取坐标 center_coords = np.array([struct[i].coords for i in centers]) target_coords = np.array([struct[i].coords for i in targets]) # 使用KDTree高效查询 lattice = struct.lattice kdtree = cKDTree(target_coords, boxsize=lattice.abc) # 计算所有中心原子的距离分布 distances, indices = kdtree.query(center_coords, k=min(50, len(targets)), distance_upper_bound=r_max) # 过滤有效距离并排除自身化学键 valid_distances = [] for i, dist_list in enumerate(distances): center_idx = centers[i] for j, dist in enumerate(dist_list): if dist > r_max: continue target_idx = targets[indices[i][j]] # 排除自身化学键 if exclude_bonds: # 计算中心原子和目标原子的实际距离 actual_dist = struct.get_distance(center_idx, target_idx) if actual_dist < bond_threshold: continue # 排除同一磷酸基团内部的相互作用 if exclude_same_group: center_p = get_parent_p(struct, center_idx) target_p = get_parent_p(struct, target_idx) # 如果属于同一个P原子,则跳过 if center_p is not None and target_p is not None and center_p == target_p: continue valid_distances.append(dist) # 统计距离分布 if len(valid_distances) > 0: hist += np.histogram(valid_distances, bins=bins)[0] # 归一化处理 n_frames = len(structures) if total_volume > 0: avg_density = total_targets / (total_volume * n_frames) else: avg_density = 0 r = bins[:-1] + bin_width/2 # 分箱中心位置 rdf = np.zeros_like(r) for i in range(len(hist)): r_lower = bins[i] r_upper = bins[i+1] shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3) expected_count = shell_vol * avg_density * total_centers / n_frames if expected_count > 0: rdf[i] = hist[i] / expected_count else: rdf[i] = 0 # 平滑处理 if len(rdf) > 5: window_length = min(11, len(rdf)//2*2+1) # 确保为奇数 rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=3) else: rdf_smoothed = rdf # 计算主要峰值位置 (1.5-3.0Å范围内) peak_info = {} mask = (r >= 1.5) & (r <= 3.0) if np.any(mask): peak_idx = np.argmax(rdf_smoothed[mask]) peak_pos = r[mask][peak_idx] peak_val = rdf_smoothed[mask][peak_idx] peak_info = {"position": peak_pos, "value": peak_val} else: peak_info = {"position": None, "value": None} return r, rdf_smoothed, peak_info def main(): # 解析命令行参数 parser = argparse.ArgumentParser(description=‘计算径向分布函数(RDF)’) parser.add_argument(‘system_name’, type=str, help=‘体系名称’) parser.add_argument(‘vasprun_file’, type=str, help=‘vasprun.xml文件路径’) parser.add_argument(‘–r_max’, type=float, default=8.0, help=‘最大距离(默认: 8.0Å)’) parser.add_argument(‘–bin_width’, type=float, default=0.05, help=‘分箱宽度(默认: 0.05Å)’) parser.add_argument(‘–skip’, type=int, default=5, help=‘跳过的离子步数(默认: 5)’) parser.add_argument(‘–output’, type=str, default=‘RDF_Results’, help=‘输出目录(默认: RDF_Results)’) args = parser.parse_args() # 创建输出目录 os.makedirs(args.output, exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Plots"), exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Data"), exist_ok=True) print(f"\n{'='*50}") print(f"Processing {args.system_name}: {args.vasprun_file}") print(f"{'='*50}") try: # 加载VASP结果 vr = Vasprun(args.vasprun_file, ionic_step_skip=args.skip) structures = vr.structures print(f"Loaded {len(structures)} frames") # 测试原子识别 test_struct = structures[0] atom_types = identify_atom_types(test_struct) print("\nAtom identification test (first frame):") print(f"P=O atoms: {len(atom_types['phosphate_oxygens']['P=O'])}") print(f"P-O atoms: {len(atom_types['phosphate_oxygens']['P-O'])}") print(f"P-OH atoms: {len(atom_types['phosphate_oxygens']['P-OH'])}") print(f"磷酸中的H (P-OH): {len(atom_types['phosphate_hydrogens'])}") print(f"水中的O: {len(atom_types['water_oxygens'])}") print(f"水中的H: {len(atom_types['water_hydrogens'])}") print(f"H₃O⁺中的O: {len(atom_types['hydronium_oxygens'])}") print(f"H₃O⁺中的H: {len(atom_types['hydronium_hydrogens'])}") print(f"氟离子: {len(atom_types['fluoride_atoms'])}") print(f"铝离子: {len(atom_types['aluminum_atoms'])}") # 定义RDF分组 rdf_groups = { "Phosphate_H_Bonds": [ (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P=O···H", "#1f77b4"), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-OH···H", "#ff7f0e"), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-O···H", "#17becf"), (lambda s: s["phosphate_hydrogens"], lambda s: s["water_oxygens"] + s["hydronium_oxygens"], "P-OH···O", "#d62728"), ], "Hydronium_H_Bonds": [ (lambda s: s["hydronium_oxygens"], lambda s: s["water_hydrogens"] + s["phosphate_hydrogens"], "H3O+ O···H", "#9467bd"), (lambda s: s["hydronium_hydrogens"], lambda s: s["water_oxygens"], "H3O+ H···Ow", "#8c564b"), (lambda s: s["hydronium_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "H3O+ H···Op", "#e377c2"), ], "Water_Network": [ (lambda s: s["water_oxygens"], lambda s: s["water_hydrogens"], "Ow···Hw", "#2ca02c"), (lambda s: s["water_oxygens"], lambda s: s["hydronium_hydrogens"], "Ow···Hh", "#d62728"), ], "Fluoride_H_Bonds": [ (lambda s: s["fluoride_atoms"], lambda s: s["water_hydrogens"], "F···Hw", "#2ca02c"), (lambda s: s["fluoride_atoms"], lambda s: s["phosphate_hydrogens"], "F···Hp", "#d62728"), (lambda s: s["fluoride_atoms"], lambda s: s["hydronium_hydrogens"], "F···Hh", "#9467bd"), ], "Aluminum_Coordination": [ (lambda s: s["aluminum_atoms"], lambda s: s["water_oxygens"], "Al···Ow", "#1f77b4"), (lambda s: s["aluminum_atoms"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "Al···Op", "#ff7f0e"), (lambda s: s["aluminum_atoms"], lambda s: s["fluoride_atoms"], "Al···F", "#17becf"), ], "Phosphate_Phosphate_Interactions": [ (lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "All P-Oxygens", "#1f77b4", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P=O"], "P=O···P=O", "#ff7f0e", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-O"], "P=O···P-O", "#2ca02c", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P=O···P-OH", "#d62728", True), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-O···P-OH", "#9467bd", True), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH", "#8c564b", True), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"], "P-OH···P=O (H-bond)", "#e377c2", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-O"], "P-OH···P-O (H-bond)", "#7f7f7f", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH (H-bond)", "#bcbd22", False) ] } # 标题映射 title_map = { "Phosphate_H_Bonds": "Phosphate Hydrogen Bonding", "Hydronium_H_Bonds": "Hydronium Ion Hydrogen Bonding", "Water_Network": "Water Network Hydrogen Bonding", "Fluoride_H_Bonds": "Fluoride Ion Hydrogen Bonding", "Aluminum_Coordination": "Aluminum Coordination Environment", "Phosphate_Phosphate_Interactions": "Phosphate-Phosphate Interactions" } # 存储所有数据 system_data = { "rdf_results": {}, "peak_infos": {} } # 存储每个分组的最大y值 group_y_max = {group_name: 0 for group_name in rdf_groups.keys()} global_x_max = args.r_max # 计算所有RDF分组 for group_name, pairs in rdf_groups.items(): system_data["rdf_results"][group_name] = {} system_data["peak_infos"][group_name] = {} # 当前分组在当前体系中的最大y值 group_y_max_current = 0 for pair in pairs: # 根据元组长度决定参数 if len(pair) == 4: # (center_sel, target_sel, label, color) center_sel, target_sel, label, color = pair exclude_same = False elif len(pair) == 5: # (center_sel, target_sel, label, color, exclude_same) center_sel, target_sel, label, color, exclude_same = pair print(f"\nCalculating RDF for: {label}") try: r, rdf, peak_info = calculate_rdf( structures, center_sel, target_sel, r_max=global_x_max, bin_width=args.bin_width, exclude_bonds=True, bond_threshold=1.3, exclude_same_group=exclude_same ) system_data["rdf_results"][group_name][label] = (r, rdf, color) system_data["peak_infos"][group_name][label] = peak_info # 更新当前分组在当前体系中的最大y值 if len(rdf) > 0: current_max = np.max(rdf) if current_max > group_y_max_current: group_y_max_current = current_max # 打印峰值信息 if peak_info["position"] is not None: print(f" Peak for {label}: {peak_info['position']:.3f} A (g(r) = {peak_info['value']:.2f})") else: print(f" No significant peak found for {label} in 1.5-3.0 A range") except Exception as e: print(f"Error calculating RDF for {label}: {str(e)}") system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color) system_data["peak_infos"][group_name][label] = {"position": None, "value": None} # 更新该分组的全局最大y值 if group_y_max_current > group_y_max[group_name]: group_y_max[group_name] = group_y_max_current # 为每个分组添加15%的余量 for group_name in group_y_max: group_y_max[group_name] = group_y_max[group_name] * 1.15 print(f"\nGroup-wise y-axis maximum values:") for group_name, y_max in group_y_max.items(): print(f"{group_name}: {y_max:.2f}") # 生成图表 print(f"\nGenerating plots for {args.system_name}") for group_name, group_data in system_data["rdf_results"].items(): print(f"\nGenerating plot for {args.system_name} - {group_name}") fig, ax = plt.subplots(figsize=(10, 7)) # 绘制所有RDF曲线 for label, (r, rdf, color) in group_data.items(): if len(r) > 0 and len(rdf) > 0: ax.plot(r, rdf, color=color, label=label, alpha=0.9, linewidth=2.5) # 应用统一坐标尺度 ax.set_xlim(0, global_x_max) # 为相同类型的图表设置统一的y轴范围 if group_y_max[group_name] > 0: ax.set_ylim(0, group_y_max[group_name]) else: ax.set_ylim(0, 5) # 图表装饰 ax.set_xlabel('Radial Distance (A)', fontweight='bold') ax.set_ylabel('g(r)', fontweight='bold') # 添加体系名称到标题 ax.set_title(f"{args.system_name}: {title_map[group_name]}", fontsize=16, pad=15) # 图例布局 ax.legend(ncol=1, loc='best', framealpha=0.95) # 添加网格 ax.grid(True, linestyle='--', alpha=0.7) # 添加氢键区域标记(1.5-2.5Å) ax.axvspan(1.5, 2.5, alpha=0.1, color='green') plt.tight_layout() filename = os.path.join(args.output, "RDF_Plots", f"RDF_{args.system_name}_{group_name}.png") plt.savefig(filename, bbox_inches='tight', dpi=300) print(f"Saved plot: {filename}") plt.close() # 保存RDF数据 print(f"\nSaving RDF data for {args.system_name}") system_dir = os.path.join(args.output, "RDF_Data", args.system_name) os.makedirs(system_dir, exist_ok=True) # 保存峰值信息 peak_info_path = os.path.join(system_dir, f"Peak_Positions_{args.system_name}.csv") with open(peak_info_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Group", "Interaction", "Peak Position (A)", "g(r) Value"]) for group_name, peaks in system_data["peak_infos"].items(): for label, info in peaks.items(): if info["position"] is not None: writer.writerow([group_name, label, f"{info['position']:.3f}", f"{info['value']:.3f}"]) else: writer.writerow([group_name, label, "N/A", "N/A"]) print(f"Saved peak positions: {peak_info_path}") # 保存所有RDF曲线数据 for group_name, group_results in system_data["rdf_results"].items(): group_dir = os.path.join(system_dir, group_name) os.makedirs(group_dir, exist_ok=True) for label, (r, rdf, color) in group_results.items(): if len(r) > 0 and len(rdf) > 0: # 创建安全的文件名 safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_") safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "") safe_label = safe_label.replace("+", "p").replace(" ", "_") filename = f"RDF_{args.system_name}_{group_name}_{safe_label}.csv" filepath = os.path.join(group_dir, filename) # 写入CSV文件 with open(filepath, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Distance (A)", "g(r)"]) for i in range(len(r)): writer.writerow([r[i], rdf[i]]) print(f"Saved RDF data: {filename}") else: print(f"No valid RDF data for {label} in {args.system_name} - {group_name}") print(f"\nCompleted processing for {args.system_name}") except Exception as e: print(f"Error processing {args.system_name}: {str(e)}") import traceback traceback.print_exc() sys.exit(1) if name == “main”: main()以上代码尝试实现对每帧重新识别原子类型,进而实现对包含质子转移过程的RDF计算,请先检验上述代码的合理性,以及可行性,在anaconda promt中执行,需要符合The Journal of Chemical Physics期刊,注意图的标尺即类型,同时输出文本结构方便origin重新绘制更改。由于之前用这个代码计算出来的结果在第一波峰周围出现不合理的负值,所以请认真检验该代码的可行性,以及针对每一帧的重新识别的计算量来实现并行或分块,目前台式电脑可以用64核以及126g内存,提高效率
最新发布
07-09
import numpy as np import matplotlib.pyplot as plt from pymatgen.io.vasp import Vasprun from pymatgen.core.structure import Structure from scipy.signal import savgol_filter from scipy.spatial import cKDTree from tqdm import tqdm import matplotlib as mpl import warnings from collections import defaultdict import os import argparse import csv from datetime import datetime import sys # 忽略可能的警告 warnings.filterwarnings("ignore", category=UserWarning) # 专业绘图设置 plt.style.use('seaborn-v0_8-whitegrid') mpl.rcParams.update({ 'font.family': 'sans-serif', 'font.sans-serif': ['Arial', 'DejaVu Sans'], 'font.size': 12, 'axes.labelsize': 14, 'axes.titlesize': 16, 'xtick.labelsize': 12, 'ytick.labelsize': 12, 'figure.dpi': 300, 'savefig.dpi': 300, 'figure.figsize': (10, 7), 'lines.linewidth': 2.5, 'legend.fontsize': 11, 'legend.framealpha': 0.9, 'mathtext.default': 'regular' }) def get_parent_p(struct, atom_index): """获取给定氧原子所属的磷酸基团(P原子索引)""" site = struct[atom_index] if site.species_string != "O": return None # 查找最近的P原子(P-O键长通常在1.5-1.6Å) p_neighbors = struct.get_neighbors(site, r=1.8) for neighbor in p_neighbors: if neighbor[0].species_string == "P": return neighbor[0].index return None def identify_atom_types(struct): """识别所有关键原子类型(增强版)""" # 磷酸氧分类 p_oxygens = {"P=O": [], "P-O": [], "P-OH": []} phosphate_hydrogens = [] # 仅P-OH基团中的H原子 # 水合氢离子识别 hydronium_oxygens = [] hydronium_hydrogens = [] # H₃O⁺中的H原子 # 普通水分子 water_oxygens = [] water_hydrogens = [] # 普通水中的H原子 # 氟离子 fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"] # 铝离子 aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"] # 创建快速邻居查找表 neighbor_cache = defaultdict(list) for i, site in enumerate(struct): if site.species_string == "O": neighbors = struct.get_neighbors(site, r=1.3) h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"] neighbor_cache[i] = h_neighbors # 识别水合氢离子 (H₃O⁺) if len(h_neighbors) == 3: hydronium_oxygens.append(i) for h_site in h_neighbors: hydronium_hydrogens.append(h_site.index) # 识别磷酸基团 for site in struct: if site.species_string == "P": neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径 # 筛选氧原子邻居 o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"] if len(o_neighbors) < 4: # 如果找不到4个氧原子,使用旧方法 for neighbor in o_neighbors: nn_site = neighbor[0] if neighbor[1] < 1.55: p_oxygens["P=O"].append(nn_site.index) else: if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)): p_oxygens["P-OH"].append(nn_site.index) else: p_oxygens["P-O"].append(nn_site.index) continue # 按距离排序 o_neighbors.sort(key=lambda x: x[1]) # 最近的氧原子为P=O p_double_o = o_neighbors[0][0] p_oxygens["P=O"].append(p_double_o.index) # 其他三个氧原子 for i in range(1, 4): o_site = o_neighbors[i][0] # 检查氧原子上是否有氢 if neighbor_cache.get(o_site.index, []): p_oxygens["P-OH"].append(o_site.index) else: p_oxygens["P-O"].append(o_site.index) # 识别P-OH基团中的H原子 (磷酸中的H) for o_idx in p_oxygens["P-OH"]: # 获取与P-OH氧相连的H原子 h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": phosphate_hydrogens.append(h_site.index) # 识别普通水分子 (排除磷酸氧和水合氢离子) for i, site in enumerate(struct): if site.species_string == "O" and i not in hydronium_oxygens: is_phosphate_oxygen = False for cat in p_oxygens.values(): if i in cat: is_phosphate_oxygen = True break if not is_phosphate_oxygen: water_oxygens.append(i) # 识别普通水分子中的H原子 (水中的H) for o_idx in water_oxygens: h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": water_hydrogens.append(h_site.index) # 验证氢原子总数 total_h = len([s for s in struct if s.species_string == "H"]) classified_h = ( len(phosphate_hydrogens) + len(water_hydrogens) + len(hydronium_hydrogens) ) if total_h != classified_h: print(f" Warning: Hydrogen mismatch! Total H: {total_h}, Classified H: {classified_h}") return { "phosphate_oxygens": p_oxygens, "phosphate_hydrogens": phosphate_hydrogens, "water_oxygens": water_oxygens, "water_hydrogens": water_hydrogens, "hydronium_oxygens": hydronium_oxygens, "hydronium_hydrogens": hydronium_hydrogens, "fluoride_atoms": fluoride_atoms, "aluminum_atoms": aluminum_atoms } def calculate_rdf(structures, center_selector, target_selector, r_max=8.0, bin_width=0.05, progress=True, exclude_bonds=True, bond_threshold=1.3, exclude_same_group=False): """ 计算径向分布函数(每帧重新识别原子类型) """ bins = np.arange(0, r_max, bin_width) hist = np.zeros(len(bins) - 1) total_centers = 0 total_targets = 0 total_volume = 0 iterator = tqdm(structures, desc="Calculating RDF") if progress else structures for struct in iterator: # 关键修改:对每一帧进行原子类型识别 atom_types = identify_atom_types(struct) # 识别中心原子 centers = center_selector(atom_types) # 识别目标原子 targets = target_selector(atom_types) # 检查是否有足够的原子 if len(centers) == 0 or len(targets) == 0: continue total_centers += len(centers) total_targets += len(targets) total_volume += struct.volume # 获取坐标 center_coords = np.array([struct[i].coords for i in centers]) target_coords = np.array([struct[i].coords for i in targets]) # 使用KDTree高效查询 lattice = struct.lattice kdtree = cKDTree(target_coords, boxsize=lattice.abc) # 计算所有中心原子的距离分布 distances, indices = kdtree.query(center_coords, k=min(50, len(targets)), distance_upper_bound=r_max) # 过滤有效距离并排除自身化学键 valid_distances = [] for i, dist_list in enumerate(distances): center_idx = centers[i] for j, dist in enumerate(dist_list): if dist > r_max: continue target_idx = targets[indices[i][j]] # 排除自身化学键 if exclude_bonds: # 计算中心原子和目标原子的实际距离 actual_dist = struct.get_distance(center_idx, target_idx) if actual_dist < bond_threshold: continue # 排除同一磷酸基团内部的相互作用 if exclude_same_group: center_p = get_parent_p(struct, center_idx) target_p = get_parent_p(struct, target_idx) # 如果属于同一个P原子,则跳过 if center_p is not None and target_p is not None and center_p == target_p: continue valid_distances.append(dist) # 统计距离分布 if len(valid_distances) > 0: hist += np.histogram(valid_distances, bins=bins)[0] # 归一化处理 n_frames = len(structures) if total_volume > 0: avg_density = total_targets / (total_volume * n_frames) else: avg_density = 0 r = bins[:-1] + bin_width/2 # 分箱中心位置 rdf = np.zeros_like(r) for i in range(len(hist)): r_lower = bins[i] r_upper = bins[i+1] shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3) expected_count = shell_vol * avg_density * total_centers / n_frames if expected_count > 0: rdf[i] = hist[i] / expected_count else: rdf[i] = 0 # 平滑处理 if len(rdf) > 5: window_length = min(11, len(rdf)//2*2+1) # 确保为奇数 rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=3) else: rdf_smoothed = rdf # 计算主要峰值位置 (1.5-3.0Å范围内) peak_info = {} mask = (r >= 1.5) & (r <= 3.0) if np.any(mask): peak_idx = np.argmax(rdf_smoothed[mask]) peak_pos = r[mask][peak_idx] peak_val = rdf_smoothed[mask][peak_idx] peak_info = {"position": peak_pos, "value": peak_val} else: peak_info = {"position": None, "value": None} return r, rdf_smoothed, peak_info def main(): # 解析命令行参数 parser = argparse.ArgumentParser(description='计算径向分布函数(RDF)') parser.add_argument('system_name', type=str, help='体系名称') parser.add_argument('vasprun_file', type=str, help='vasprun.xml文件路径') parser.add_argument('--r_max', type=float, default=8.0, help='最大距离(默认: 8.0Å)') parser.add_argument('--bin_width', type=float, default=0.05, help='分箱宽度(默认: 0.05Å)') parser.add_argument('--skip', type=int, default=5, help='跳过的离子步数(默认: 5)') parser.add_argument('--output', type=str, default='RDF_Results', help='输出目录(默认: RDF_Results)') args = parser.parse_args() # 创建输出目录 os.makedirs(args.output, exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Plots"), exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Data"), exist_ok=True) print(f"\n{'='*50}") print(f"Processing {args.system_name}: {args.vasprun_file}") print(f"{'='*50}") try: # 加载VASP结果 vr = Vasprun(args.vasprun_file, ionic_step_skip=args.skip) structures = vr.structures print(f"Loaded {len(structures)} frames") # 测试原子识别 test_struct = structures[0] atom_types = identify_atom_types(test_struct) print("\nAtom identification test (first frame):") print(f"P=O atoms: {len(atom_types['phosphate_oxygens']['P=O'])}") print(f"P-O atoms: {len(atom_types['phosphate_oxygens']['P-O'])}") print(f"P-OH atoms: {len(atom_types['phosphate_oxygens']['P-OH'])}") print(f"磷酸中的H (P-OH): {len(atom_types['phosphate_hydrogens'])}") print(f"水中的O: {len(atom_types['water_oxygens'])}") print(f"水中的H: {len(atom_types['water_hydrogens'])}") print(f"H₃O⁺中的O: {len(atom_types['hydronium_oxygens'])}") print(f"H₃O⁺中的H: {len(atom_types['hydronium_hydrogens'])}") print(f"氟离子: {len(atom_types['fluoride_atoms'])}") print(f"铝离子: {len(atom_types['aluminum_atoms'])}") # 定义RDF分组 rdf_groups = { "Phosphate_H_Bonds": [ (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P=O···H", "#1f77b4"), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-OH···H", "#ff7f0e"), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-O···H", "#17becf"), (lambda s: s["phosphate_hydrogens"], lambda s: s["water_oxygens"] + s["hydronium_oxygens"], "P-OH···O", "#d62728"), ], "Hydronium_H_Bonds": [ (lambda s: s["hydronium_oxygens"], lambda s: s["water_hydrogens"] + s["phosphate_hydrogens"], "H3O+ O···H", "#9467bd"), (lambda s: s["hydronium_hydrogens"], lambda s: s["water_oxygens"], "H3O+ H···Ow", "#8c564b"), (lambda s: s["hydronium_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "H3O+ H···Op", "#e377c2"), ], "Water_Network": [ (lambda s: s["water_oxygens"], lambda s: s["water_hydrogens"], "Ow···Hw", "#2ca02c"), (lambda s: s["water_oxygens"], lambda s: s["hydronium_hydrogens"], "Ow···Hh", "#d62728"), ], "Fluoride_H_Bonds": [ (lambda s: s["fluoride_atoms"], lambda s: s["water_hydrogens"], "F···Hw", "#2ca02c"), (lambda s: s["fluoride_atoms"], lambda s: s["phosphate_hydrogens"], "F···Hp", "#d62728"), (lambda s: s["fluoride_atoms"], lambda s: s["hydronium_hydrogens"], "F···Hh", "#9467bd"), ], "Aluminum_Coordination": [ (lambda s: s["aluminum_atoms"], lambda s: s["water_oxygens"], "Al···Ow", "#1f77b4"), (lambda s: s["aluminum_atoms"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "Al···Op", "#ff7f0e"), (lambda s: s["aluminum_atoms"], lambda s: s["fluoride_atoms"], "Al···F", "#17becf"), ], "Phosphate_Phosphate_Interactions": [ (lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "All P-Oxygens", "#1f77b4", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P=O"], "P=O···P=O", "#ff7f0e", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-O"], "P=O···P-O", "#2ca02c", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P=O···P-OH", "#d62728", True), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-O···P-OH", "#9467bd", True), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH", "#8c564b", True), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"], "P-OH···P=O (H-bond)", "#e377c2", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-O"], "P-OH···P-O (H-bond)", "#7f7f7f", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH (H-bond)", "#bcbd22", False) ] } # 标题映射 title_map = { "Phosphate_H_Bonds": "Phosphate Hydrogen Bonding", "Hydronium_H_Bonds": "Hydronium Ion Hydrogen Bonding", "Water_Network": "Water Network Hydrogen Bonding", "Fluoride_H_Bonds": "Fluoride Ion Hydrogen Bonding", "Aluminum_Coordination": "Aluminum Coordination Environment", "Phosphate_Phosphate_Interactions": "Phosphate-Phosphate Interactions" } # 存储所有数据 system_data = { "rdf_results": {}, "peak_infos": {} } # 存储每个分组的最大y值 group_y_max = {group_name: 0 for group_name in rdf_groups.keys()} global_x_max = args.r_max # 计算所有RDF分组 for group_name, pairs in rdf_groups.items(): system_data["rdf_results"][group_name] = {} system_data["peak_infos"][group_name] = {} # 当前分组在当前体系中的最大y值 group_y_max_current = 0 for pair in pairs: # 根据元组长度决定参数 if len(pair) == 4: # (center_sel, target_sel, label, color) center_sel, target_sel, label, color = pair exclude_same = False elif len(pair) == 5: # (center_sel, target_sel, label, color, exclude_same) center_sel, target_sel, label, color, exclude_same = pair print(f"\nCalculating RDF for: {label}") try: r, rdf, peak_info = calculate_rdf( structures, center_sel, target_sel, r_max=global_x_max, bin_width=args.bin_width, exclude_bonds=True, bond_threshold=1.3, exclude_same_group=exclude_same ) system_data["rdf_results"][group_name][label] = (r, rdf, color) system_data["peak_infos"][group_name][label] = peak_info # 更新当前分组在当前体系中的最大y值 if len(rdf) > 0: current_max = np.max(rdf) if current_max > group_y_max_current: group_y_max_current = current_max # 打印峰值信息 if peak_info["position"] is not None: print(f" Peak for {label}: {peak_info['position']:.3f} A (g(r) = {peak_info['value']:.2f})") else: print(f" No significant peak found for {label} in 1.5-3.0 A range") except Exception as e: print(f"Error calculating RDF for {label}: {str(e)}") system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color) system_data["peak_infos"][group_name][label] = {"position": None, "value": None} # 更新该分组的全局最大y值 if group_y_max_current > group_y_max[group_name]: group_y_max[group_name] = group_y_max_current # 为每个分组添加15%的余量 for group_name in group_y_max: group_y_max[group_name] = group_y_max[group_name] * 1.15 print(f"\nGroup-wise y-axis maximum values:") for group_name, y_max in group_y_max.items(): print(f"{group_name}: {y_max:.2f}") # 生成图表 print(f"\nGenerating plots for {args.system_name}") for group_name, group_data in system_data["rdf_results"].items(): print(f"\nGenerating plot for {args.system_name} - {group_name}") fig, ax = plt.subplots(figsize=(10, 7)) # 绘制所有RDF曲线 for label, (r, rdf, color) in group_data.items(): if len(r) > 0 and len(rdf) > 0: ax.plot(r, rdf, color=color, label=label, alpha=0.9, linewidth=2.5) # 应用统一坐标尺度 ax.set_xlim(0, global_x_max) # 为相同类型的图表设置统一的y轴范围 if group_y_max[group_name] > 0: ax.set_ylim(0, group_y_max[group_name]) else: ax.set_ylim(0, 5) # 图表装饰 ax.set_xlabel('Radial Distance (A)', fontweight='bold') ax.set_ylabel('g(r)', fontweight='bold') # 添加体系名称到标题 ax.set_title(f"{args.system_name}: {title_map[group_name]}", fontsize=16, pad=15) # 图例布局 ax.legend(ncol=1, loc='best', framealpha=0.95) # 添加网格 ax.grid(True, linestyle='--', alpha=0.7) # 添加氢键区域标记(1.5-2.5Å) ax.axvspan(1.5, 2.5, alpha=0.1, color='green') plt.tight_layout() filename = os.path.join(args.output, "RDF_Plots", f"RDF_{args.system_name}_{group_name}.png") plt.savefig(filename, bbox_inches='tight', dpi=300) print(f"Saved plot: {filename}") plt.close() # 保存RDF数据 print(f"\nSaving RDF data for {args.system_name}") system_dir = os.path.join(args.output, "RDF_Data", args.system_name) os.makedirs(system_dir, exist_ok=True) # 保存峰值信息 peak_info_path = os.path.join(system_dir, f"Peak_Positions_{args.system_name}.csv") with open(peak_info_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Group", "Interaction", "Peak Position (A)", "g(r) Value"]) for group_name, peaks in system_data["peak_infos"].items(): for label, info in peaks.items(): if info["position"] is not None: writer.writerow([group_name, label, f"{info['position']:.3f}", f"{info['value']:.3f}"]) else: writer.writerow([group_name, label, "N/A", "N/A"]) print(f"Saved peak positions: {peak_info_path}") # 保存所有RDF曲线数据 for group_name, group_results in system_data["rdf_results"].items(): group_dir = os.path.join(system_dir, group_name) os.makedirs(group_dir, exist_ok=True) for label, (r, rdf, color) in group_results.items(): if len(r) > 0 and len(rdf) > 0: # 创建安全的文件名 safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_") safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "") safe_label = safe_label.replace("+", "p").replace(" ", "_") filename = f"RDF_{args.system_name}_{group_name}_{safe_label}.csv" filepath = os.path.join(group_dir, filename) # 写入CSV文件 with open(filepath, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Distance (A)", "g(r)"]) for i in range(len(r)): writer.writerow([r[i], rdf[i]]) print(f"Saved RDF data: {filename}") else: print(f"No valid RDF data for {label} in {args.system_name} - {group_name}") print(f"\nCompleted processing for {args.system_name}") except Exception as e: print(f"Error processing {args.system_name}: {str(e)}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()以上代码尝试实现对每帧重新识别原子类型,进而实现对包含质子转移过程的RDF计算,请先检验上述代码的合理性,以及可行性,在anaconda promt中执行,需要符合The Journal of Chemical Physics期刊,注意图的标尺即类型,同时输出文本结构方便origin重新绘制更改
07-09
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值