import MDAnalysis as mda
import numpy as np
from MDAnalysis.lib.distances import capped_distance, calc_angles, distance_array
import multiprocessing as mp
from tqdm import tqdm
import os
import argparse
import matplotlib.pyplot as plt
from scipy.integrate import cumtrapz
import pandas as pd
from functools import lru_cache
import time
======================
优化参数设置
BOX_SIZE = 80.0
CALC_REGION = [0, 60, 0, 60, 0, 60]
ATOM_TYPE_MAP = {1: ‘H’, 2: ‘O’, 7: ‘P’}
HB_DIST_CUTOFF = 3.5
HB_ANGLE_CUTOFF = 130
FRAME_DT = 0.1
TAU_MAX = 20.0 # 缩短时间窗口
DT_ORIGIN = 2.0 # 增加原点间隔
N_WORKERS = os.cpu_count() # 自动使用所有核心
预定义原子类型常量
O_WATER = 0
O_HYDRONIUM = 1
O_PHYDROXYL = 2
O_PDOUBLE = 3
O_PBRIDGE = 4
H_WATER = 5
H_HYDRONIUM = 6
H_PHOSPHORIC = 7
P = 8
UNKNOWN = 9
原子类型映射
ATOM_TYPE_IDS = {
‘O_water’: O_WATER,
‘O_hydronium’: O_HYDRONIUM,
‘O_phydroxyl’: O_PHYDROXYL,
‘O_pdouble’: O_PDOUBLE,
‘O_pbridge’: O_PBRIDGE,
‘H_water’: H_WATER,
‘H_hydronium’: H_HYDRONIUM,
‘H_phosphoric’: H_PHOSPHORIC,
‘P’: P,
‘UNK’: UNKNOWN
}
原子类型反向映射
ATOM_TYPE_NAMES =
def classify_atoms_optimized(positions, types, box):
“”“向量化优化的原子分类函数”“”
n_atoms = len(types)
atom_labels = np.full(n_atoms, UNKNOWN, dtype=np.int8)
识别原子类型 o_indices = np.where(types == 2)[0] h_indices = np.where(types == 1)[0] p_indices = np.where(types == 7)[0] # 计算所有O-H距离 oh_distances = distance_array(positions[o_indices], positions[h_indices], box=box) oh_bonds = oh_distances < 1.2 # 计算所有P-O距离 po_distances = distance_array(positions[p_indices], positions[o_indices], box=box) po_bonds = po_distances < 1.8 # 分类氢原子 for h_idx in h_indices: # 找到与氢原子相连的氧原子 o_conn = o_indices[np.where(oh_bonds[:, np.where(h_indices == h_idx)[0][0]])[0]] if len(o_conn) == 1: o_type = atom_labels[o_conn[0]] if o_type == O_HYDRONIUM: atom_labels[h_idx] = H_HYDRONIUM elif o_type == O_WATER: atom_labels[h_idx] = H_WATER elif o_type == O_PHYDROXYL: atom_labels[h_idx] = H_PHOSPHORIC # 分类氧原子 for i, o_idx in enumerate(o_indices): # 计算连接数 h_count = np.sum(oh_bonds[i]) p_count = np.sum(po_bonds[:, i]) if h_count == 3: atom_labels[o_idx] = O_HYDRONIUM elif h_count == 2: atom_labels[o_idx] = O_WATER elif h_count == 1 and p_count == 1: atom_labels[o_idx] = O_PHYDROXYL elif p_count == 1: atom_labels[o_idx] = O_PDOUBLE elif p_count == 2: atom_labels[o_idx] = O_PBRIDGE # 分类磷原子 atom_labels[p_indices] = P return atom_labels
def find_hbonds_optimized(positions, atom_labels, box):
“”“向量化优化的氢键识别函数”“”
识别供体和受体
donor_mask = np.isin(atom_labels, [O_WATER, O_HYDRONIUM, O_PHYDROXYL])
acceptor_mask = np.isin(atom_labels, [O_WATER, O_HYDRONIUM, O_PDOUBLE, O_PBRIDGE, O_PHYDROXYL])
获取供体氢原子 donor_hydrogens = [] donor_indices = [] donor_types = [] for i in np.where(donor_mask)[0]: # 找到与供体相连的氢原子 h_mask = (atom_labels == H_WATER) | (atom_labels == H_HYDRONIUM) | (atom_labels == H_PHOSPHORIC) h_positions = positions[h_mask] h_indices = np.where(h_mask)[0] dists = distance_array(positions[i].reshape(1, -1), h_positions, box=box) connected_h = np.where(dists[0] < 1.2)[0] for h_idx in connected_h: donor_hydrogens.append(h_indices[h_idx]) donor_indices.append(i) donor_types.append(atom_labels[i]) if not donor_hydrogens or not np.any(acceptor_mask): return [] # 转换为数组以提高性能 donor_hydrogens = np.array(donor_hydrogens) donor_indices = np.array(donor_indices) donor_types = np.array(donor_types) acceptor_indices = np.where(acceptor_mask)[0] # 计算供体氢到受体的距离 dists = distance_array(positions[donor_hydrogens], positions[acceptor_indices], box=box) # 找出满足距离条件的配对 valid_pairs = np.where(dists < HB_DIST_CUTOFF) hbonds = [] for i, j in zip(*valid_pairs): donor_idx = donor_indices[i] hydrogen_idx = donor_hydrogens[i] acceptor_idx = acceptor_indices[j] # 跳过自相互作用 if donor_idx == acceptor_idx: continue # 计算角度 angle = calc_angles(positions[donor_idx], positions[hydrogen_idx], positions[acceptor_idx], box=box) angle_deg = np.degrees(angle) if angle_deg > HB_ANGLE_CUTOFF: hbond_type = f"{ATOM_TYPE_NAMES[donor_types[i]].split(‘‘)[0]}-{ATOM_TYPE_NAMES[atom_labels[acceptor_idx]].split(’’)[0]}" hbonds.append({ ‘donor’: donor_idx, ‘hydrogen’: hydrogen_idx, ‘acceptor’: acceptor_idx, ‘type’: hbond_type, ‘distance’: dists[i, j], ‘angle’: angle_deg }) return hbonds
def process_frame(args):
“”“并行处理单个帧”“”
frame_idx, top_file, traj_file, calc_region = args
try:
universe = mda.Universe(
top_file, traj_file,
topology_format=“DATA”,
format=“LAMMPSDUMP”,
atom_style=“id type charge x y z”
)
universe.trajectory[frame_idx]
应用区域筛选 positions = universe.atoms.positions mask = ((positions[:, 0] >= calc_region[0]) & (positions[:, 0] <= calc_region[1]) & (positions[:, 1] >= calc_region[2]) & (positions[:, 1] <= calc_region[3]) & (positions[:, 2] >= calc_region[4]) & (positions[:, 2] <= calc_region[5])) if not np.any(mask): return frame_idx, [] types = universe.atoms.types.astype(int)[mask] positions = positions[mask] # 分类原子 atom_labels = classify_atoms_optimized( positions, types, universe.dimensions ) # 识别氢键 hbonds = find_hbonds_optimized( positions, atom_labels, universe.dimensions ) return frame_idx, hbonds except Exception as e: print(f"Error processing frame {frame_idx}: {str(e)}") return frame_idx, []
def calculate_hbond_lifetime(origins, universe, atom_labels_cache):
“”“优化后的氢键寿命计算函数”“”
n_frames = len(universe.trajectory)
tau_max_frames = int(TAU_MAX / FRAME_DT)
scf = np.zeros(tau_max_frames) count = np.zeros(tau_max_frames) for origin in origins: hbonds_origin = atom_labels_cache.get(origin, []) for hbond in hbonds_origin: donor = hbond[‘donor’] hydrogen = hbond[‘hydrogen’] acceptor = hbond[‘acceptor’] # 提前获取所有帧的位置 positions_cache = {} for t in range(min(tau_max_frames, n_frames - origin)): frame_idx = origin + t if frame_idx not in positions_cache: universe.trajectory[frame_idx] positions_cache[frame_idx] = universe.atoms.positions.copy() # 检查氢键存活 hbond_exists = True for t in range(1, min(tau_max_frames, n_frames - origin)): frame_idx = origin + t pos = positions_cache[frame_idx] dist = np.linalg.norm(pos[hydrogen] - pos[acceptor]) angle = calc_angles( pos[donor], pos[hydrogen], pos[acceptor], box=universe.dimensions ) angle_deg = np.degrees(angle) if dist > HB_DIST_CUTOFF or angle_deg < HB_ANGLE_CUTOFF: hbond_exists = False if hbond_exists: scf[t] += 1 count[t] += 1 else: break return scf, count
def main(top_file, traj_file, output_prefix):
print(f"加载拓扑文件: {top_file}“)
print(f"加载轨迹文件: {traj_file}”)
创建Universe获取帧数 universe = mda.Universe( top_file, traj_file, topology_format=“DATA”, format=“LAMMPSDUMP”, atom_style=“id type charge x y z” ) n_frames = len(universe.trajectory) print(f"开始预处理 {n_frames} 帧 (并行处理)…“) start_time = time.time() # 并行处理所有帧 with mp.Pool(processes=N_WORKERS) as pool: args = [(i, top_file, traj_file, CALC_REGION) for i in range(n_frames)] results = list(tqdm(pool.imap(process_frame, args, chunksize=10), total=n_frames)) atom_labels_cache = {} all_hbonds = [] frame_hbond_counts = {} for frame_idx, frame_hbonds in results: atom_labels_cache[frame_idx] = frame_hbonds all_hbonds.extend(frame_hbonds) for hb in frame_hbonds: hb_type = hb[‘type’] if hb_type not in frame_hbond_counts: frame_hbond_counts[hb_type] = [] frame_hbond_counts[hb_type].append(1) preprocess_time = time.time() - start_time print(f"预处理完成! 耗时: {preprocess_time:.2f} 秒”) # 计算氢键密度 avg_hbonds = len(all_hbonds) / n_frames calc_volume = ((CALC_REGION[1]-CALC_REGION[0]) * (CALC_REGION[3]-CALC_REGION[2]) * (CALC_REGION[5]-CALC_REGION[4])) * 1e-27 hbond_density = avg_hbonds / (calc_volume * 6.022e23) print(f"平均氢键数: {avg_hbonds:.2f}“) print(f"氢键数密度: {hbond_density:.4f} mol/L”) # 氢键寿命计算 print(“计算氢键寿命…”) tau_max_frames = int(TAU_MAX / FRAME_DT) dt_origin_frames = int(DT_ORIGIN / FRAME_DT) origins = range(0, n_frames - tau_max_frames, dt_origin_frames) chunk_size = max(1, len(origins) // N_WORKERS) with mp.Pool(processes=N_WORKERS) as pool: tasks = [] for i in range(0, len(origins), chunk_size): chunk = origins[i:i+chunk_size] tasks.append(pool.apply_async(calculate_hbond_lifetime, (chunk, universe, atom_labels_cache))) scf_total = np.zeros(tau_max_frames) count_total = np.zeros(tau_max_frames) for task in tqdm(tasks, total=len(tasks)): scf, count = task.get() scf_total += scf count_total += count # 结果处理 scf_avg = np.where(count_total > 0, scf_total / count_total, 0) time_axis = np.arange(0, tau_max_frames) * FRAME_DT tau_relax = np.trapz(scf_avg, time_axis) print(f"弛豫时间: {tau_relax:.2f} ps") # 输出结果 scf_df = pd.DataFrame({‘Time (ps)’: time_axis, ‘SCF’: scf_avg}) scf_df.to_csv(f"{output_prefix}scf.csv", index=False) stats = { ‘Avg_HBonds_per_frame’: [avg_hbonds], ‘HBond_Density_mol/L’: [hbond_density], ‘Relaxation_Time_ps’: [tau_relax], ‘Preprocessing_Time_s’: [preprocess_time] } for hb_type, counts in frame_hbond_counts.items(): stats[f"Avg"] = [np.mean(counts)] stats_df = pd.DataFrame(stats) stats_df.to_csv(f"{output_prefix}_stats.csv", index=False) # 绘图 plt.figure(figsize=(8, 6), dpi=300) plt.plot(time_axis, scf_avg, ‘b-’, linewidth=2) plt.xlabel(‘Time (ps)’, fontsize=12, fontname=‘Times New Roman’) plt.ylabel(‘Survival Correlation Function’, fontsize=12, fontname=‘Times New Roman’) plt.title(‘Hydrogen Bond Lifetime’, fontsize=14, fontname=‘Times New Roman’) plt.grid(alpha=0.3) plt.xlim(0, TAU_MAX) plt.ylim(0, 1) plt.rcParams[‘font.family’] = ‘Times New Roman’ plt.rcParams[‘mathtext.fontset’] = ‘stix’ plt.tight_layout() plt.savefig(f"{output_prefix}_lifetime.png", format=‘png’, bbox_inches=‘tight’) print(“分析完成!”)
if name == “main”:
parser = argparse.ArgumentParser(description=‘优化版LAMMPS氢键寿命分析’)
parser.add_argument(‘top_file’, type=str, help=‘LAMMPS拓扑文件路径 (.data)’)
parser.add_argument(‘traj_file’, type=str, help=‘LAMMPS轨迹文件路径 (.lammpstrj 或 .lammpsdump)’)
parser.add_argument(‘output_prefix’, type=str, help=‘输出文件前缀’)
parser.add_argument(‘–workers’, type=int, default=N_WORKERS, help=‘并行工作进程数’)
args = parser.parse_args() N_WORKERS = args.workers main(args.top_file, args.traj_file, args.output_prefix)
以上代码意在实现湿法磷酸中磷酸和水构成的氢键网络的氢键寿命计算,其中可能存在水合氢离子,存在动态的质子转移情况。然而在执行的时候,速度实在太慢了,目前电脑有64核 126g内存,能否进一步提高内存和CPU的利用率提高速度,总原子数在23495,体系是相当大,步长0.1fs 而总步数10000000,总时长1ns,已经能够读取data和lammpstrj文件,其中注意单位埃的输入输出,字符串类型可能会报错,为有效减小计算量,只计算前100ps内的氢键寿命.在这里你不需要为了节约时间而省略细节,尽可能的详细且设置好代码,同时命令的执行形式为python hydrogen_bond_analysis.py H3PO4-23pure.data nvt-P2O5-353K-23.lammpstrj Lifetime --workers 58,其他需要的参数放置在代码文件中谢谢
最新发布