import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
import os
import csv
import argparse
import multiprocessing
import time
import sys
忽略可能的警告
warnings.filterwarnings(“ignore”, category=UserWarning)
专业绘图设置
plt.style.use(‘seaborn-v0_8-whitegrid’)
mpl.rcParams.update({
‘font.family’: ‘serif’,
‘font.serif’: [‘Times New Roman’, ‘DejaVu Serif’],
‘font.size’: 12,
‘axes.labelsize’: 14,
‘axes.titlesize’: 16,
‘xtick.labelsize’: 12,
‘ytick.labelsize’: 12,
‘figure.dpi’: 600,
‘savefig.dpi’: 600,
‘figure.figsize’: (8, 6),
‘lines.linewidth’: 2.0,
‘legend.fontsize’: 10,
‘legend.framealpha’: 0.8,
‘mathtext.default’: ‘regular’,
‘axes.linewidth’: 1.5,
‘xtick.major.width’: 1.5,
‘ytick.major.width’: 1.5,
‘xtick.major.size’: 5,
‘ytick.major.size’: 5,
})
def identify_atom_types(struct):
“”“原子类型识别函数”“”
# 初始化数据结构
atom_types = {
“phosphate_oxygens”: {“P-O/P=O”: [], “P-OH”: []},
“phosphate_hydrogens”: [],
“water_oxygens”: [],
“water_hydrogens”: [],
“hydronium_oxygens”: [],
“hydronium_hydrogens”: [],
“fluoride_atoms”: [i for i, site in enumerate(struct) if site.species_string == “F”],
“aluminum_atoms”: [i for i, site in enumerate(struct) if site.species_string == “Al”]
}
# 构建全局KDTree all_coords = np.array([site.coords for site in struct]) kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc) # 识别磷酸基团 p_atoms = [i for i, site in enumerate(struct) if site.species_string == "P"] phosphate_oxygens = [] for p_idx in p_atoms: # 查找P周围的O原子 (距离 < 1.6Å) neighbors = kdtree.query_ball_point(all_coords[p_idx], r=1.6) p_o_indices = [idx for idx in neighbors if idx != p_idx and struct[idx].species_string == "O"] phosphate_oxygens.extend(p_o_indices) # 识别所有H原子并确定归属 hydrogen_owners = {} h_atoms = [i for i, site in enumerate(struct) if site.species_string == "H"] for h_idx in h_atoms: neighbors = kdtree.query_ball_point(all_coords[h_idx], r=1.2) candidate_os = [idx for idx in neighbors if idx != h_idx and struct[idx].species_string == "O"] if not candidate_os: continue min_dist = float('inf') owner_o = None for o_idx in candidate_os: dist = struct.get_distance(h_idx, o_idx) if dist < min_dist: min_dist = dist owner_o = o_idx hydrogen_owners[h_idx] = owner_o # 分类磷酸氧:带H的为P-OH,不带H的为P-O/P=O for o_idx in phosphate_oxygens: has_hydrogen = any(owner_o == o_idx for h_idx, owner_o in hydrogen_owners.items()) if has_hydrogen: atom_types["phosphate_oxygens"]["P-OH"].append(o_idx) else: atom_types["phosphate_oxygens"]["P-O/P=O"].append(o_idx) # 识别水和水合氢离子 all_o_indices = [i for i, site in enumerate(struct) if site.species_string == "O"] non_phosphate_os = [o_idx for o_idx in all_o_indices if o_idx not in phosphate_oxygens] o_h_count = {} for h_idx, owner_o in hydrogen_owners.items(): o_h_count[owner_o] = o_h_count.get(owner_o, 0) + 1 for o_idx in non_phosphate_os: h_count = o_h_count.get(o_idx, 0) attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx] if h_count == 2: atom_types["water_oxygens"].append(o_idx) atom_types["water_hydrogens"].extend(attached_hs) elif h_count == 3: atom_types["hydronium_oxygens"].append(o_idx) atom_types["hydronium_hydrogens"].extend(attached_hs) # 识别磷酸基团的H原子 for o_idx in atom_types["phosphate_oxygens"]["P-OH"]: attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx] atom_types["phosphate_hydrogens"].extend(attached_hs) return atom_types
def get_hbond_config():
“”“返回氢键配置,包含距离和角度阈值”“”
return [
{
“name”: “P-O/P=O···Hw”,
“donor_type”: “water_oxygens”,
“acceptor_type”: “P-O/P=O”,
“h_type”: “water_hydrogens”,
“distance_threshold”: 2.375,
“angle_threshold”: 143.99,
“color”: “#1f77b4”
},
{
“name”: “P-O/P=O···Hh”,
“donor_type”: “hydronium_oxygens”,
“acceptor_type”: “P-O/P=O”,
“h_type”: “hydronium_hydrogens”,
“distance_threshold”: 2.275,
“angle_threshold”: 157.82,
“color”: “#ff7f0e”
},
{
“name”: “P-O/P=O···Hp”,
“donor_type”: “P-OH”,
“acceptor_type”: “P-O/P=O”,
“h_type”: “phosphate_hydrogens”,
“distance_threshold”: 2.175,
“angle_threshold”: 155.00,
“color”: “#2ca02c”
},
{
“name”: “P-OH···Ow”,
“donor_type”: “P-OH”,
“acceptor_type”: “water_oxygens”,
“h_type”: “phosphate_hydrogens”,
“distance_threshold”: 2.275,
“angle_threshold”: 155.13,
“color”: “#d62728”
},
{
“name”: “Hw···Ow”,
“donor_type”: “water_oxygens”,
“acceptor_type”: “water_oxygens”,
“h_type”: “water_hydrogens”,
“distance_threshold”: 2.375,
“angle_threshold”: 138.73,
“color”: “#9467bd”
},
{
“name”: “Hh···Ow”,
“donor_type”: “hydronium_oxygens”,
“acceptor_type”: “water_oxygens”,
“h_type”: “hydronium_hydrogens”,
“distance_threshold”: 2.225,
“angle_threshold”: 155.31,
“color”: “#8c564b”
},
{
“name”: “Hw···F”,
“donor_type”: “water_oxygens”,
“acceptor_type”: “fluoride_atoms”,
“h_type”: “water_hydrogens”,
“distance_threshold”: 2.225,
“angle_threshold”: 137.68,
“color”: “#e377c2”
},
{
“name”: “Hh···F”,
“donor_type”: “hydronium_oxygens”,
“acceptor_type”: “fluoride_atoms”,
“h_type”: “hydronium_hydrogens”,
“distance_threshold”: 2.175,
“angle_threshold”: 154.64,
“color”: “#7f7f7f”
},
{
“name”: “Hp···F”,
“donor_type”: “P-OH”,
“acceptor_type”: “fluoride_atoms”,
“h_type”: “phosphate_hydrogens”,
“distance_threshold”: 2.275,
“angle_threshold”: 153.71,
“color”: “#bcbd22”
}
]
def calculate_angle(struct, donor_idx, h_idx, acceptor_idx):
“”“计算D-H···A键角 (角度制),使用笛卡尔向量表示并处理周期性”“”
# 获取分数坐标
frac_coords = struct.frac_coords
lattice = struct.lattice
# 获取氢原子H的分数坐标 h_frac = frac_coords[h_idx] # 计算供体D相对于H的分数坐标差 d_frac = frac_coords[donor_idx] dh_frac = d_frac - h_frac # 计算受体A相对于H的分数坐标差 a_frac = frac_coords[acceptor_idx] ah_frac = a_frac - h_frac # 应用周期性修正 (将分数坐标差限制在[-0.5, 0.5]范围内) dh_frac = np.where(dh_frac > 0.5, dh_frac - 1, dh_frac) dh_frac = np.where(dh_frac < -0.5, dh_frac + 1, dh_frac) ah_frac = np.where(ah_frac > 0.5, ah_frac - 1, ah_frac) ah_frac = np.where(ah_frac < -0.5, ah_frac + 1, ah_frac) # 转换为笛卡尔向量 (H->D 和 H->A) vec_hd = np.dot(dh_frac, lattice.matrix) # H->D向量 vec_ha = np.dot(ah_frac, lattice.matrix) # H->A向量 # 计算向量点积 dot_product = np.dot(vec_hd, vec_ha) # 计算向量模长 norm_hd = np.linalg.norm(vec_hd) norm_ha = np.linalg.norm(vec_ha) # 避免除以零 if norm_hd < 1e-6 or norm_ha < 1e-6: return None # 计算余弦值 cos_theta = dot_product / (norm_hd * norm_ha) # 处理数值误差 cos_theta = np.clip(cos_theta, -1.0, 1.0) # 计算角度 (弧度转角度) angle_rad = np.arccos(cos_theta) angle_deg = np.degrees(angle_rad) return angle_deg
def calculate_hbond_lengths_frame(struct, atom_types, hbond_config, bond_threshold=1.3):
“”“计算单帧结构中氢键键长(H···A距离)”“”
# 构建全局KDTree用于快速搜索
all_coords = np.array([site.coords for site in struct])
lattice_abc = struct.lattice.abc
kdtree = cKDTree(all_coords, boxsize=lattice_abc)
# 结果字典: {氢键类型: [键长列表]} length_results = {hbond["name"]: [] for hbond in hbond_config} # 处理每一类氢键 for hbond in hbond_config: # 获取供体原子列表 if hbond["donor_type"] == "P-OH": donors = atom_types["phosphate_oxygens"]["P-OH"] else: donors = atom_types[hbond["donor_type"]] # 获取受体原子列表 if hbond["acceptor_type"] == "P-O/P=O": acceptors = atom_types["phosphate_oxygens"]["P-O/P=O"] elif hbond["acceptor_type"] == "P-OH": acceptors = atom_types["phosphate_oxygens"]["P-OH"] else: acceptors = atom_types[hbond["acceptor_type"]] # 获取氢原子列表 hydrogens = atom_types[hbond["h_type"]] # 如果没有氢原子或受体,跳过 if len(hydrogens) == 0 or len(acceptors) == 0: continue # 为受体构建KDTree(使用全局坐标) acceptor_coords = all_coords[acceptors] acceptor_kdtree = cKDTree(acceptor_coords, boxsize=lattice_abc) # 遍历所有氢原子 for h_idx in hydrogens: h_coords = all_coords[h_idx] # 查找与H成键的供体 (距离 < bond_threshold) donor_neighbors = kdtree.query_ball_point(h_coords, r=bond_threshold) donor_candidates = [idx for idx in donor_neighbors if idx in donors] # 如果没有找到供体,跳过 if not donor_candidates: continue # 选择最近的供体 min_dist = float('inf') donor_idx = None for d_idx in donor_candidates: dist = struct.get_distance(h_idx, d_idx) if dist < min_dist: min_dist = dist donor_idx = d_idx # 查找在距离阈值内的受体 acceptor_indices = acceptor_kdtree.query_ball_point(h_coords, r=hbond["distance_threshold"]) for a_idx_offset in acceptor_indices: a_idx = acceptors[a_idx_offset] # 排除供体自身 if a_idx == donor_idx: continue # 计算键长 (H···A距离) ha_distance = struct.get_distance(h_idx, a_idx) # 计算键角 angle = calculate_angle(struct, donor_idx, h_idx, a_idx) # 检查角度阈值 if angle is not None and angle >= hbond["angle_threshold"]: length_results[hbond["name"]].append(ha_distance) return length_results
def calculate_hbond_lengths_frame_wrapper(args):
“”“包装函数用于多进程处理”“”
struct, hbond_config = args
atom_types = identify_atom_types(struct)
return calculate_hbond_lengths_frame(struct, atom_types, hbond_config)
def calculate_hbond_lengths_parallel(structures, workers=1, step_interval=10):
“”“并行计算氢键键长,每step_interval帧计算一次”“”
hbond_config = get_hbond_config()
all_results = []
# 只选择每step_interval帧的结构 selected_structures = structures[::step_interval] frame_indices = list(range(0, len(structures), step_interval)) # 准备参数列表 args_list = [(struct, hbond_config) for struct in selected_structures] # 如果没有可用的worker,则顺序执行 if workers == 1: results = [] for args in tqdm(args_list, desc="Calculating HBond Lengths"): results.append(calculate_hbond_lengths_frame_wrapper(args)) else: with multiprocessing.Pool(processes=workers) as pool: results = list(tqdm( pool.imap(calculate_hbond_lengths_frame_wrapper, args_list), total=len(selected_structures), desc="Calculating HBond Lengths" )) # 将结果与帧索引组合 for frame_idx, frame_result in zip(frame_indices, results): all_results.append({ "frame_idx": frame_idx, "results": frame_result }) return all_results
def plot_hbond_length_time_series(all_results, system_name):
“”“绘制氢键键长随时间变化的曲线并保存原始数据”“”
# 创建输出目录
os.makedirs(“HBond_Length_Time_Series”, exist_ok=True)
os.makedirs(“HBond_Length_Data”, exist_ok=True)
hbond_config = get_hbond_config() # 创建统计信息汇总文件 - 使用'A'代替Å避免编码问题 summary_path = os.path.join("HBond_Length_Data", f"{system_name}_summary.csv") with open(summary_path, 'w', newline='', encoding='utf-8') as summary_file: summary_writer = csv.writer(summary_file) summary_writer.writerow(["HBond Type", "Mean Length (A)", "Std Dev (A)", "Median (A)", "Min (A)", "Max (A)"]) # 处理每种氢键类型 for hbond in hbond_config: hbond_name = hbond["name"] safe_name = hbond_name.replace("/", "").replace("=", "").replace("···", "_").replace(" ", "_") # 提取该氢键类型的所有帧的键长 frame_indices = [] all_lengths = [] for frame_data in all_results: frame_idx = frame_data["frame_idx"] lengths = frame_data["results"].get(hbond_name, []) if lengths: # 只记录有数据的帧 frame_indices.append(frame_idx) all_lengths.append(lengths) if not frame_indices: print(f"No hydrogen bonds found for {hbond_name} in {system_name}") continue # 计算每帧的统计量 mean_lengths = [np.mean(lengths) for lengths in all_lengths] median_lengths = [np.median(lengths) for lengths in all_lengths] std_lengths = [np.std(lengths) for lengths in all_lengths] # 计算全局统计量 all_flat_lengths = [length for sublist in all_lengths for length in sublist] global_mean = np.mean(all_flat_lengths) global_std = np.std(all_flat_lengths) global_median = np.median(all_flat_lengths) global_min = np.min(all_flat_lengths) global_max = np.max(all_flat_lengths) # 保存全局统计信息到汇总文件 with open(summary_path, 'a', newline='', encoding='utf-8') as summary_file: summary_writer = csv.writer(summary_file) summary_writer.writerow([ hbond_name, f"{global_mean:.4f}", f"{global_std:.4f}", f"{global_median:.4f}", f"{global_min:.4f}", f"{global_max:.4f}" ]) # ========== 保存原始时间序列数据 ========== data_path = os.path.join("HBond_Length_Data", f"{system_name}_{safe_name}_time_series.csv") with open(data_path, 'w', newline='', encoding='utf-8') as data_file: data_writer = csv.writer(data_file) data_writer.writerow(["Frame Index", "Mean Length (A)", "Median Length (A)", "Std Dev (A)"]) for i, frame_idx in enumerate(frame_indices): data_writer.writerow([ frame_idx, f"{mean_lengths[i]:.4f}", f"{median_lengths[i]:.4f}", f"{std_lengths[i]:.4f}" ]) # ========== 绘制时间序列图 ========== plt.figure(figsize=(10, 6)) # 绘制平均键长曲线 plt.plot(frame_indices, mean_lengths, color=hbond["color"], label="Mean Length", linewidth=2) # 绘制中位数键长曲线 plt.plot(frame_indices, median_lengths, color=hbond["color"], linestyle="--", label="Median Length", linewidth=1.5) # 添加标准差误差带 plt.fill_between( frame_indices, np.array(mean_lengths) - np.array(std_lengths), np.array(mean_lengths) + np.array(std_lengths), color=hbond["color"], alpha=0.2, label="±1 Std Dev" ) # 添加全局统计信息 stats_text = (f"Global Mean: {global_mean:.3f} $\mathrm{{\\AA}}$\n" f"Global Std: {global_std:.3f} $\mathrm{{\\AA}}$\n" f"Global Median: {global_median:.3f} $\mathrm{{\\AA}}$\n" f"Count: {len(all_flat_lengths)}") plt.text(0.75, 0.95, stats_text, transform=plt.gca().transAxes, verticalalignment='top', bbox=dict(boxstyle='round', alpha=0.5)) # 设置标题和标签 plt.title(f"{system_name}: {hbond_name} Bond Length Time Series", fontsize=16) plt.xlabel("Frame Index", fontsize=14) plt.ylabel(r"Bond Length ($\mathrm{\AA}$)", fontsize=14) # 使用LaTeX格式的Å符号 # 设置Y轴范围 y_min = max(0.5, global_min - 0.1) y_max = min(3.5, global_max + 0.1) plt.ylim(y_min, y_max) plt.grid(True, linestyle='--', alpha=0.7) plt.legend() # 保存图像 image_path = os.path.join("HBond_Length_Time_Series", f"{system_name}_{safe_name}_time_series.tiff") plt.savefig(image_path, dpi=600, bbox_inches='tight') plt.close() print(f"Saved HBond length time series data: {data_path}") print(f"Saved HBond length time series plot: {image_path}") print(f"Saved summary statistics: {summary_path}")
def main(vasprun_files, workers=1, step_interval=10):
“”“主处理函数”“”
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{‘=’*50}“)
print(f"Processing {system_name}: {vasprun_file} with {workers} workers”)
print(f"Step interval: {step_interval} frames")
print(f"{‘=’*50}")
start_time = time.time()
try: # 加载VASP结果 vr = Vasprun(vasprun_file, ionic_step_skip=5) structures = vr.structures print(f"Loaded {len(structures)} frames") print(f"Lattice parameters: {structures[0].lattice.abc}") print(f"Periodic boundary handling: Fractional coordinates + PBC correction") # 计算氢键键长时间序列 print("Calculating hydrogen bond lengths over time...") hbond_lengths = calculate_hbond_lengths_parallel(structures, workers=workers, step_interval=step_interval) # 绘制并保存结果 plot_hbond_length_time_series(hbond_lengths, system_name) elapsed = time.time() - start_time print(f"\nCompleted processing for {system_name} in {elapsed:.2f} seconds") except Exception as e: print(f"Error processing {system_name}: {str(e)}", file=sys.stderr) import traceback traceback.print_exc() print("\nAll HBond length time series analysis completed successfully!")
if name == “main”:
# 设置命令行参数
parser = argparse.ArgumentParser(description=‘Calculate hydrogen bond lengths from VASP simulations’)
parser.add_argument(‘–workers’, type=int, default=multiprocessing.cpu_count(),
help=f’Number of parallel workers (default: {multiprocessing.cpu_count()})‘)
parser.add_argument(’–step_interval’, type=int, default=10,
help=‘Frame interval for analysis (default: 10)’)
args = parser.parse_args() # 自动设置vasprun文件和系统名称 vasprun_files = { "System1": "vasprun1.xml", "System2": "vasprun2.xml", "System3": "vasprun3.xml", "System4": "vasprun4.xml" } # 检查文件是否存在 missing_files = [name for name, path in vasprun_files.items() if not os.path.exists(path)] if missing_files: raise FileNotFoundError(f"Missing vasprun files: {', '.join(missing_files)}") print(f"Starting HBond length analysis with {args.workers} workers...") main(vasprun_files, workers=args.workers, step_interval=args.step_interval)以上代码已经没什么问题了,就是修改绘图部分,能不能将y轴统一,便于小论文中图之间的对比,统一为1-2.4,同时图中的线的图例和统计结果发生了重叠,可以一个在左上角,一个在右上角,要符合The Journal of Chemical Physics期刊要求
最新发布