import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.signal import savgol_filter
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
from collections import defaultdict
import os
import argparse
import csv
from datetime import datetime
import sys
忽略可能的警告
warnings.filterwarnings(“ignore”, category=UserWarning)
专业绘图设置
plt.style.use(‘seaborn-v0_8-whitegrid’)
mpl.rcParams.update({
‘font.family’: ‘sans-serif’,
‘font.sans-serif’: [‘Arial’, ‘DejaVu Sans’],
‘font.size’: 12,
‘axes.labelsize’: 14,
‘axes.titlesize’: 16,
‘xtick.labelsize’: 12,
‘ytick.labelsize’: 12,
‘figure.dpi’: 300,
‘savefig.dpi’: 300,
‘figure.figsize’: (10, 7),
‘lines.linewidth’: 2.5,
‘legend.fontsize’: 11,
‘legend.framealpha’: 0.9,
‘mathtext.default’: ‘regular’
})
def get_parent_p(struct, atom_index):
“”“获取给定氧原子所属的磷酸基团(P原子索引)”“”
site = struct[atom_index]
if site.species_string != “O”:
return None
# 查找最近的P原子(P-O键长通常在1.5-1.6Å) p_neighbors = struct.get_neighbors(site, r=1.8) for neighbor in p_neighbors: if neighbor[0].species_string == "P": return neighbor[0].index return None
def identify_atom_types(struct):
“”“识别所有关键原子类型(增强版)”“”
# 磷酸氧分类
p_oxygens = {“P=O”: [], “P-O”: [], “P-OH”: []}
phosphate_hydrogens = [] # 仅P-OH基团中的H原子
# 水合氢离子识别 hydronium_oxygens = [] hydronium_hydrogens = [] # H₃O⁺中的H原子 # 普通水分子 water_oxygens = [] water_hydrogens = [] # 普通水中的H原子 # 氟离子 fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"] # 铝离子 aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"] # 创建快速邻居查找表 neighbor_cache = defaultdict(list) for i, site in enumerate(struct): if site.species_string == "O": neighbors = struct.get_neighbors(site, r=1.3) h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"] neighbor_cache[i] = h_neighbors # 识别水合氢离子 (H₃O⁺) if len(h_neighbors) == 3: hydronium_oxygens.append(i) for h_site in h_neighbors: hydronium_hydrogens.append(h_site.index) # 识别磷酸基团 for site in struct: if site.species_string == "P": neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径 # 筛选氧原子邻居 o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"] if len(o_neighbors) < 4: # 如果找不到4个氧原子,使用旧方法 for neighbor in o_neighbors: nn_site = neighbor[0] if neighbor[1] < 1.55: p_oxygens["P=O"].append(nn_site.index) else: if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)): p_oxygens["P-OH"].append(nn_site.index) else: p_oxygens["P-O"].append(nn_site.index) continue # 按距离排序 o_neighbors.sort(key=lambda x: x[1]) # 最近的氧原子为P=O p_double_o = o_neighbors[0][0] p_oxygens["P=O"].append(p_double_o.index) # 其他三个氧原子 for i in range(1, 4): o_site = o_neighbors[i][0] # 检查氧原子上是否有氢 if neighbor_cache.get(o_site.index, []): p_oxygens["P-OH"].append(o_site.index) else: p_oxygens["P-O"].append(o_site.index) # 识别P-OH基团中的H原子 (磷酸中的H) for o_idx in p_oxygens["P-OH"]: # 获取与P-OH氧相连的H原子 h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": phosphate_hydrogens.append(h_site.index) # 识别普通水分子 (排除磷酸氧和水合氢离子) for i, site in enumerate(struct): if site.species_string == "O" and i not in hydronium_oxygens: is_phosphate_oxygen = False for cat in p_oxygens.values(): if i in cat: is_phosphate_oxygen = True break if not is_phosphate_oxygen: water_oxygens.append(i) # 识别普通水分子中的H原子 (水中的H) for o_idx in water_oxygens: h_neighbors = neighbor_cache.get(o_idx, []) for h_site in h_neighbors: if h_site.species_string == "H": water_hydrogens.append(h_site.index) # 验证氢原子总数 total_h = len([s for s in struct if s.species_string == "H"]) classified_h = ( len(phosphate_hydrogens) + len(water_hydrogens) + len(hydronium_hydrogens) ) if total_h != classified_h: print(f" Warning: Hydrogen mismatch! Total H: {total_h}, Classified H: {classified_h}") return { "phosphate_oxygens": p_oxygens, "phosphate_hydrogens": phosphate_hydrogens, "water_oxygens": water_oxygens, "water_hydrogens": water_hydrogens, "hydronium_oxygens": hydronium_oxygens, "hydronium_hydrogens": hydronium_hydrogens, "fluoride_atoms": fluoride_atoms, "aluminum_atoms": aluminum_atoms }
def calculate_rdf(structures, center_selector, target_selector,
r_max=8.0, bin_width=0.05, progress=True,
exclude_bonds=True, bond_threshold=1.3,
exclude_same_group=False):
“”"
计算径向分布函数(每帧重新识别原子类型)
“”"
bins = np.arange(0, r_max, bin_width)
hist = np.zeros(len(bins) - 1)
total_centers = 0
total_targets = 0
total_volume = 0
iterator = tqdm(structures, desc="Calculating RDF") if progress else structures for struct in iterator: # 关键修改:对每一帧进行原子类型识别 atom_types = identify_atom_types(struct) # 识别中心原子 centers = center_selector(atom_types) # 识别目标原子 targets = target_selector(atom_types) # 检查是否有足够的原子 if len(centers) == 0 or len(targets) == 0: continue total_centers += len(centers) total_targets += len(targets) total_volume += struct.volume # 获取坐标 center_coords = np.array([struct[i].coords for i in centers]) target_coords = np.array([struct[i].coords for i in targets]) # 使用KDTree高效查询 lattice = struct.lattice kdtree = cKDTree(target_coords, boxsize=lattice.abc) # 计算所有中心原子的距离分布 distances, indices = kdtree.query(center_coords, k=min(50, len(targets)), distance_upper_bound=r_max) # 过滤有效距离并排除自身化学键 valid_distances = [] for i, dist_list in enumerate(distances): center_idx = centers[i] for j, dist in enumerate(dist_list): if dist > r_max: continue target_idx = targets[indices[i][j]] # 排除自身化学键 if exclude_bonds: # 计算中心原子和目标原子的实际距离 actual_dist = struct.get_distance(center_idx, target_idx) if actual_dist < bond_threshold: continue # 排除同一磷酸基团内部的相互作用 if exclude_same_group: center_p = get_parent_p(struct, center_idx) target_p = get_parent_p(struct, target_idx) # 如果属于同一个P原子,则跳过 if center_p is not None and target_p is not None and center_p == target_p: continue valid_distances.append(dist) # 统计距离分布 if len(valid_distances) > 0: hist += np.histogram(valid_distances, bins=bins)[0] # 归一化处理 n_frames = len(structures) if total_volume > 0: avg_density = total_targets / (total_volume * n_frames) else: avg_density = 0 r = bins[:-1] + bin_width/2 # 分箱中心位置 rdf = np.zeros_like(r) for i in range(len(hist)): r_lower = bins[i] r_upper = bins[i+1] shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3) expected_count = shell_vol * avg_density * total_centers / n_frames if expected_count > 0: rdf[i] = hist[i] / expected_count else: rdf[i] = 0 # 平滑处理 if len(rdf) > 5: window_length = min(11, len(rdf)//2*2+1) # 确保为奇数 rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=3) else: rdf_smoothed = rdf # 计算主要峰值位置 (1.5-3.0Å范围内) peak_info = {} mask = (r >= 1.5) & (r <= 3.0) if np.any(mask): peak_idx = np.argmax(rdf_smoothed[mask]) peak_pos = r[mask][peak_idx] peak_val = rdf_smoothed[mask][peak_idx] peak_info = {"position": peak_pos, "value": peak_val} else: peak_info = {"position": None, "value": None} return r, rdf_smoothed, peak_info
def main():
# 解析命令行参数
parser = argparse.ArgumentParser(description=‘计算径向分布函数(RDF)’)
parser.add_argument(‘system_name’, type=str, help=‘体系名称’)
parser.add_argument(‘vasprun_file’, type=str, help=‘vasprun.xml文件路径’)
parser.add_argument(‘–r_max’, type=float, default=8.0, help=‘最大距离(默认: 8.0Å)’)
parser.add_argument(‘–bin_width’, type=float, default=0.05, help=‘分箱宽度(默认: 0.05Å)’)
parser.add_argument(‘–skip’, type=int, default=5, help=‘跳过的离子步数(默认: 5)’)
parser.add_argument(‘–output’, type=str, default=‘RDF_Results’, help=‘输出目录(默认: RDF_Results)’)
args = parser.parse_args() # 创建输出目录 os.makedirs(args.output, exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Plots"), exist_ok=True) os.makedirs(os.path.join(args.output, "RDF_Data"), exist_ok=True) print(f"\n{'='*50}") print(f"Processing {args.system_name}: {args.vasprun_file}") print(f"{'='*50}") try: # 加载VASP结果 vr = Vasprun(args.vasprun_file, ionic_step_skip=args.skip) structures = vr.structures print(f"Loaded {len(structures)} frames") # 测试原子识别 test_struct = structures[0] atom_types = identify_atom_types(test_struct) print("\nAtom identification test (first frame):") print(f"P=O atoms: {len(atom_types['phosphate_oxygens']['P=O'])}") print(f"P-O atoms: {len(atom_types['phosphate_oxygens']['P-O'])}") print(f"P-OH atoms: {len(atom_types['phosphate_oxygens']['P-OH'])}") print(f"磷酸中的H (P-OH): {len(atom_types['phosphate_hydrogens'])}") print(f"水中的O: {len(atom_types['water_oxygens'])}") print(f"水中的H: {len(atom_types['water_hydrogens'])}") print(f"H₃O⁺中的O: {len(atom_types['hydronium_oxygens'])}") print(f"H₃O⁺中的H: {len(atom_types['hydronium_hydrogens'])}") print(f"氟离子: {len(atom_types['fluoride_atoms'])}") print(f"铝离子: {len(atom_types['aluminum_atoms'])}") # 定义RDF分组 rdf_groups = { "Phosphate_H_Bonds": [ (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P=O···H", "#1f77b4"), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-OH···H", "#ff7f0e"), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["water_hydrogens"] + s["hydronium_hydrogens"], "P-O···H", "#17becf"), (lambda s: s["phosphate_hydrogens"], lambda s: s["water_oxygens"] + s["hydronium_oxygens"], "P-OH···O", "#d62728"), ], "Hydronium_H_Bonds": [ (lambda s: s["hydronium_oxygens"], lambda s: s["water_hydrogens"] + s["phosphate_hydrogens"], "H3O+ O···H", "#9467bd"), (lambda s: s["hydronium_hydrogens"], lambda s: s["water_oxygens"], "H3O+ H···Ow", "#8c564b"), (lambda s: s["hydronium_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "H3O+ H···Op", "#e377c2"), ], "Water_Network": [ (lambda s: s["water_oxygens"], lambda s: s["water_hydrogens"], "Ow···Hw", "#2ca02c"), (lambda s: s["water_oxygens"], lambda s: s["hydronium_hydrogens"], "Ow···Hh", "#d62728"), ], "Fluoride_H_Bonds": [ (lambda s: s["fluoride_atoms"], lambda s: s["water_hydrogens"], "F···Hw", "#2ca02c"), (lambda s: s["fluoride_atoms"], lambda s: s["phosphate_hydrogens"], "F···Hp", "#d62728"), (lambda s: s["fluoride_atoms"], lambda s: s["hydronium_hydrogens"], "F···Hh", "#9467bd"), ], "Aluminum_Coordination": [ (lambda s: s["aluminum_atoms"], lambda s: s["water_oxygens"], "Al···Ow", "#1f77b4"), (lambda s: s["aluminum_atoms"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "Al···Op", "#ff7f0e"), (lambda s: s["aluminum_atoms"], lambda s: s["fluoride_atoms"], "Al···F", "#17becf"), ], "Phosphate_Phosphate_Interactions": [ (lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P=O"] + s["phosphate_oxygens"]["P-O"] + s["phosphate_oxygens"]["P-OH"], "All P-Oxygens", "#1f77b4", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P=O"], "P=O···P=O", "#ff7f0e", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-O"], "P=O···P-O", "#2ca02c", True), (lambda s: s["phosphate_oxygens"]["P=O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P=O···P-OH", "#d62728", True), (lambda s: s["phosphate_oxygens"]["P-O"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-O···P-OH", "#9467bd", True), (lambda s: s["phosphate_oxygens"]["P-OH"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH", "#8c564b", True), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P=O"], "P-OH···P=O (H-bond)", "#e377c2", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-O"], "P-OH···P-O (H-bond)", "#7f7f7f", False), (lambda s: s["phosphate_hydrogens"], lambda s: s["phosphate_oxygens"]["P-OH"], "P-OH···P-OH (H-bond)", "#bcbd22", False) ] } # 标题映射 title_map = { "Phosphate_H_Bonds": "Phosphate Hydrogen Bonding", "Hydronium_H_Bonds": "Hydronium Ion Hydrogen Bonding", "Water_Network": "Water Network Hydrogen Bonding", "Fluoride_H_Bonds": "Fluoride Ion Hydrogen Bonding", "Aluminum_Coordination": "Aluminum Coordination Environment", "Phosphate_Phosphate_Interactions": "Phosphate-Phosphate Interactions" } # 存储所有数据 system_data = { "rdf_results": {}, "peak_infos": {} } # 存储每个分组的最大y值 group_y_max = {group_name: 0 for group_name in rdf_groups.keys()} global_x_max = args.r_max # 计算所有RDF分组 for group_name, pairs in rdf_groups.items(): system_data["rdf_results"][group_name] = {} system_data["peak_infos"][group_name] = {} # 当前分组在当前体系中的最大y值 group_y_max_current = 0 for pair in pairs: # 根据元组长度决定参数 if len(pair) == 4: # (center_sel, target_sel, label, color) center_sel, target_sel, label, color = pair exclude_same = False elif len(pair) == 5: # (center_sel, target_sel, label, color, exclude_same) center_sel, target_sel, label, color, exclude_same = pair print(f"\nCalculating RDF for: {label}") try: r, rdf, peak_info = calculate_rdf( structures, center_sel, target_sel, r_max=global_x_max, bin_width=args.bin_width, exclude_bonds=True, bond_threshold=1.3, exclude_same_group=exclude_same ) system_data["rdf_results"][group_name][label] = (r, rdf, color) system_data["peak_infos"][group_name][label] = peak_info # 更新当前分组在当前体系中的最大y值 if len(rdf) > 0: current_max = np.max(rdf) if current_max > group_y_max_current: group_y_max_current = current_max # 打印峰值信息 if peak_info["position"] is not None: print(f" Peak for {label}: {peak_info['position']:.3f} A (g(r) = {peak_info['value']:.2f})") else: print(f" No significant peak found for {label} in 1.5-3.0 A range") except Exception as e: print(f"Error calculating RDF for {label}: {str(e)}") system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color) system_data["peak_infos"][group_name][label] = {"position": None, "value": None} # 更新该分组的全局最大y值 if group_y_max_current > group_y_max[group_name]: group_y_max[group_name] = group_y_max_current # 为每个分组添加15%的余量 for group_name in group_y_max: group_y_max[group_name] = group_y_max[group_name] * 1.15 print(f"\nGroup-wise y-axis maximum values:") for group_name, y_max in group_y_max.items(): print(f"{group_name}: {y_max:.2f}") # 生成图表 print(f"\nGenerating plots for {args.system_name}") for group_name, group_data in system_data["rdf_results"].items(): print(f"\nGenerating plot for {args.system_name} - {group_name}") fig, ax = plt.subplots(figsize=(10, 7)) # 绘制所有RDF曲线 for label, (r, rdf, color) in group_data.items(): if len(r) > 0 and len(rdf) > 0: ax.plot(r, rdf, color=color, label=label, alpha=0.9, linewidth=2.5) # 应用统一坐标尺度 ax.set_xlim(0, global_x_max) # 为相同类型的图表设置统一的y轴范围 if group_y_max[group_name] > 0: ax.set_ylim(0, group_y_max[group_name]) else: ax.set_ylim(0, 5) # 图表装饰 ax.set_xlabel('Radial Distance (A)', fontweight='bold') ax.set_ylabel('g(r)', fontweight='bold') # 添加体系名称到标题 ax.set_title(f"{args.system_name}: {title_map[group_name]}", fontsize=16, pad=15) # 图例布局 ax.legend(ncol=1, loc='best', framealpha=0.95) # 添加网格 ax.grid(True, linestyle='--', alpha=0.7) # 添加氢键区域标记(1.5-2.5Å) ax.axvspan(1.5, 2.5, alpha=0.1, color='green') plt.tight_layout() filename = os.path.join(args.output, "RDF_Plots", f"RDF_{args.system_name}_{group_name}.png") plt.savefig(filename, bbox_inches='tight', dpi=300) print(f"Saved plot: {filename}") plt.close() # 保存RDF数据 print(f"\nSaving RDF data for {args.system_name}") system_dir = os.path.join(args.output, "RDF_Data", args.system_name) os.makedirs(system_dir, exist_ok=True) # 保存峰值信息 peak_info_path = os.path.join(system_dir, f"Peak_Positions_{args.system_name}.csv") with open(peak_info_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Group", "Interaction", "Peak Position (A)", "g(r) Value"]) for group_name, peaks in system_data["peak_infos"].items(): for label, info in peaks.items(): if info["position"] is not None: writer.writerow([group_name, label, f"{info['position']:.3f}", f"{info['value']:.3f}"]) else: writer.writerow([group_name, label, "N/A", "N/A"]) print(f"Saved peak positions: {peak_info_path}") # 保存所有RDF曲线数据 for group_name, group_results in system_data["rdf_results"].items(): group_dir = os.path.join(system_dir, group_name) os.makedirs(group_dir, exist_ok=True) for label, (r, rdf, color) in group_results.items(): if len(r) > 0 and len(rdf) > 0: # 创建安全的文件名 safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_") safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "") safe_label = safe_label.replace("+", "p").replace(" ", "_") filename = f"RDF_{args.system_name}_{group_name}_{safe_label}.csv" filepath = os.path.join(group_dir, filename) # 写入CSV文件 with open(filepath, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["Distance (A)", "g(r)"]) for i in range(len(r)): writer.writerow([r[i], rdf[i]]) print(f"Saved RDF data: {filename}") else: print(f"No valid RDF data for {label} in {args.system_name} - {group_name}") print(f"\nCompleted processing for {args.system_name}") except Exception as e: print(f"Error processing {args.system_name}: {str(e)}") import traceback traceback.print_exc() sys.exit(1)
if name == “main”:
main()以上代码尝试实现对每帧重新识别原子类型,进而实现对包含质子转移过程的RDF计算,请先检验上述代码的合理性,以及可行性,在anaconda promt中执行,需要符合The Journal of Chemical Physics期刊,注意图的标尺即类型,同时输出文本结构方便origin重新绘制更改。由于之前用这个代码计算出来的结果在第一波峰周围出现不合理的负值,所以请认真检验该代码的可行性,以及针对每一帧的重新识别的计算量来实现并行或分块,目前台式电脑可以用64核以及126g内存,提高效率
最新发布