import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import cumtrapz
from scipy.ndimage import gaussian_filter1d
from scipy.spatial import cKDTree
from tqdm import tqdm
import multiprocessing
from functools import partial
import time
import os
import argparse
import warnings
from collections import defaultdict
import dill
import re
from pymatgen.core import Structure, Lattice
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置 - 符合Journal of Chemical Physics要求
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
# 原子类型识别函数
def identify_atom_types(struct):
"""优化的原子类型识别函数,不再区分P=O和P-O"""
# 初始化数据结构
atom_types = {
"phosphate_oxygens": {"P-O/P=O": [], "P-OH": []},
"phosphate_hydrogens": [],
"water_oxygens": [],
"water_hydrogens": [],
"hydronium_oxygens": [],
"hydronium_hydrogens": [],
"fluoride_atoms": [i for i, site in enumerate(struct) if site.species_string == "F"],
"aluminum_atoms": [i for i, site in enumerate(struct) if site.species_string == "Al"]
}
# 构建全局KDTree
all_coords = np.array([site.coords for site in struct])
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
# 识别磷酸基团
p_atoms = [i for i, site in enumerate(struct) if site.species_string == "P"]
phosphate_oxygens = []
for p_idx in p_atoms:
# 查找P周围的O原子 (距离 < 1.6Å)
neighbors = kdtree.query_ball_point(all_coords[p_idx], r=1.6)
p_o_indices = [idx for idx in neighbors if idx != p_idx and struct[idx].species_string == "O"]
phosphate_oxygens.extend(p_o_indices)
# 识别所有H原子并确定归属
hydrogen_owners = {}
h_atoms = [i for i, site in enumerate(struct) if site.species_string == "H"]
for h_idx in h_atoms:
neighbors = kdtree.query_ball_point(all_coords[h_idx], r=1.2)
candidate_os = [idx for idx in neighbors if idx != h_idx and struct[idx].species_string == "O"]
if not candidate_os:
continue
min_dist = float('inf')
owner_o = None
for o_idx in candidate_os:
dist = struct.get_distance(h_idx, o_idx)
if dist < min_dist:
min_dist = dist
owner_o = o_idx
hydrogen_owners[h_idx] = owner_o
# 分类磷酸氧:带H的为P-OH,不带H的为P-O/P=O
for o_idx in phosphate_oxygens:
has_hydrogen = any(owner_o == o_idx for h_idx, owner_o in hydrogen_owners.items())
if has_hydrogen:
atom_types["phosphate_oxygens"]["P-OH"].append(o_idx)
else:
atom_types["phosphate_oxygens"]["P-O/P=O"].append(o_idx)
# 识别水和水合氢离子
all_o_indices = [i for i, site in enumerate(struct) if site.species_string == "O"]
non_phosphate_os = [o_idx for o_idx in all_o_indices if o_idx not in phosphate_oxygens]
o_h_count = defaultdict(int)
for h_idx, owner_o in hydrogen_owners.items():
o_h_count[owner_o] += 1
for o_idx in non_phosphate_os:
h_count = o_h_count.get(o_idx, 0)
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
if h_count == 2:
atom_types["water_oxygens"].append(o_idx)
atom_types["water_hydrogens"].extend(attached_hs)
elif h_count == 3:
atom_types["hydronium_oxygens"].append(o_idx)
atom_types["hydronium_hydrogens"].extend(attached_hs)
# 识别磷酸基团的H原子
for o_idx in atom_types["phosphate_oxygens"]["P-OH"]:
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
atom_types["phosphate_hydrogens"].extend(attached_hs)
return atom_types
# 氢键配置
def get_hbond_angle_config():
"""返回九类氢键的氢键分析配置"""
return [
{
"name": "P-O/P=O···Hw",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "P-O/P=O", # 受体氧 (磷酸中的非羟基氧)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.375, # 氢键距离阈值 (Å)
"color": "blue"
},
{
"name": "P-O/P=O···Hh",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "P-O/P=O",
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.275,
"color": "green"
},
{
"name": "P-O/P=O···Hp",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "P-O/P=O",
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.175,
"color": "red"
},
{
"name": "P-OH···Ow",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "water_oxygens", # 受体氧 (水中的O)
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.275,
"color": "orange"
},
{
"name": "Hw···Ow",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "water_oxygens", # 受体氧 (另一个水中的O)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.375,
"color": "purple"
},
{
"name": "Hh···Ow",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "water_oxygens", # 受体氧 (水中的O)
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.225,
"color": "cyan"
},
{
"name": "Hw···F",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "fluoride_atoms", # 受体 (氟离子)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.225,
"color": "magenta"
},
{
"name": "Hh···F",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "fluoride_atoms",
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.175,
"color": "brown"
},
{
"name": "Hp···F",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "fluoride_atoms",
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.275,
"color": "pink"
}
]
# 氢键存在性检查函数
def check_hbond_exists(struct, atom_types, donor_idx, acceptor_idx, hbond_config, bond_threshold=1.3):
"""检查给定供体-受体对之间是否存在氢键"""
# 获取所有原子坐标
all_coords = np.array([site.coords for site in struct])
# 获取氢键配置
hbond_types = get_hbond_angle_config()
# 确定氢键类型
hbond_type = None
for hb in hbond_types:
# 确定供体类型
if hb["donor_type"] == "P-OH":
donors = atom_types["phosphate_oxygens"]["P-OH"]
else:
donors = atom_types[hb["donor_type"]]
# 确定受体类型
if hb["acceptor_type"] == "P-O/P=O":
acceptors = atom_types["phosphate_oxygens"]["P-O/P=O"]
elif hb["acceptor_type"] == "P-OH":
acceptors = atom_types["phosphate_oxygens"]["P-OH"]
else:
acceptors = atom_types[hb["acceptor_type"]]
# 检查是否匹配
if donor_idx in donors and acceptor_idx in acceptors:
hbond_type = hb
break
if hbond_type is None:
return False
# 获取供体坐标
donor_coords = all_coords[donor_idx]
# 查找与供体成键的氢原子 (距离 < bond_threshold)
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
neighbors = kdtree.query_ball_point(donor_coords, r=bond_threshold)
hydrogen_candidates = [i for i in neighbors if struct[i].species_string == "H"]
# 检查每个候选氢原子
for h_idx in hydrogen_candidates:
h_coords = all_coords[h_idx]
# 计算H-受体距离
acceptor_coords = all_coords[acceptor_idx]
h_acceptor_dist = np.linalg.norm(h_coords - acceptor_coords)
# 检查距离阈值
if h_acceptor_dist > hbond_type["threshold"]:
continue
# 计算键角
vec_dh = h_coords - donor_coords
vec_ha = acceptor_coords - h_coords
dot_product = np.dot(vec_dh, vec_ha)
norm_dh = np.linalg.norm(vec_dh)
norm_ha = np.linalg.norm(vec_ha)
if norm_dh < 1e-6 or norm_ha < 1e-6:
continue
cos_theta = dot_product / (norm_dh * norm_ha)
cos_theta = np.clip(cos_theta, -1.0, 1.0)
angle_deg = np.degrees(np.arccos(cos_theta))
# 检查角度阈值
if angle_deg >= 120.0:
return True
return False
def identify_hbond_pairs(struct, atom_types, hbond_config):
"""识别单帧中所有可能的氢键对"""
hbond_pairs = []
# 获取所有原子坐标
all_coords = np.array([site.coords for site in struct])
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
# 处理每种氢键类型
for hb in hbond_config:
# 获取供体原子列表
if hb["donor_type"] == "P-OH":
donors = atom_types["phosphate_oxygens"]["P-OH"]
else:
donors = atom_types[hb["donor_type"]]
# 获取受体原子列表
if hb["acceptor_type"] == "P-O/P=O":
acceptors = atom_types["phosphate_oxygens"]["P-O/P=O"]
elif hb["acceptor_type"] == "P-OH":
acceptors = atom_types["phosphate_oxygens"]["P-OH"]
else:
acceptors = atom_types[hb["acceptor_type"]]
# 如果没有供体或受体,跳过
if not donors or not acceptors:
continue
# 为受体构建KDTree
acceptor_coords = np.array([all_coords[i] for i in acceptors])
acceptor_kdtree = cKDTree(acceptor_coords, boxsize=struct.lattice.abc)
# 遍历所有供体
for donor_idx in donors:
donor_coords = all_coords[donor_idx]
# 查找在阈值内的受体
acceptor_indices = acceptor_kdtree.query_ball_point(
donor_coords, r=hb["threshold"] + 1.0 # 稍微放宽阈值
)
for a_idx_offset in acceptor_indices:
acceptor_idx = acceptors[a_idx_offset]
# 排除自相互作用
if donor_idx == acceptor_idx:
continue
# 添加到氢键对列表
hbond_pairs.append((donor_idx, acceptor_idx))
# 去重
hbond_pairs = list(set(hbond_pairs))
return hbond_pairs
def process_hbond_frame(struct, atom_types, hbond_pairs, hbond_config):
"""处理单帧氢键存在性"""
hbond_exists = []
for donor_idx, acceptor_idx in hbond_pairs:
exists = check_hbond_exists(struct, atom_types, donor_idx, acceptor_idx, hbond_config)
hbond_exists.append(exists)
return np.array(hbond_exists, dtype=bool)
def calculate_scf_worker(args):
"""工作进程函数:计算单个时间原点的存活相关函数"""
t0, structures, hbond_pairs_t0, hbond_config, t_max = args
n_frames = len(structures)
n_hbonds = len(hbond_pairs_t0)
# 初始化SCF数组
scf = np.zeros(t_max + 1)
# 获取t0帧的原子类型
atom_types_t0 = identify_atom_types(structures[t0])
# 计算t0时刻的氢键存在性
exists_t0 = process_hbond_frame(
structures[t0], atom_types_t0, hbond_pairs_t0, hbond_config
)
# 对于每个时间延迟t
for t in range(0, min(t_max + 1, n_frames - t0)):
t_frame = t0 + t
# 获取当前帧的原子类型
atom_types_t = identify_atom_types(structures[t_frame])
# 计算当前帧的氢键存在性
exists_t = process_hbond_frame(
structures[t_frame], atom_types_t, hbond_pairs_t0, hbond_config
)
# 计算存活相关函数
scf[t] = np.sum(exists_t0 & exists_t) / max(1, np.sum(exists_t0))
return scf
def calculate_hbond_lifetime(structures, workers=1, delta_t0=10, t_max=5000):
"""计算氢键寿命(存活相关函数)"""
start_time = time.time()
n_frames = len(structures)
hbond_config = get_hbond_angle_config()
print(f"Calculating hydrogen bond lifetime for {n_frames} frames")
print(f"Workers: {workers}, Delta t0: {delta_t0}, Max t: {t_max} frames")
# 步骤1: 识别参考帧(t0)的氢键对
t0_ref = 0 # 使用第一帧作为参考
atom_types_ref = identify_atom_types(structures[t0_ref])
hbond_pairs_ref = identify_hbond_pairs(
structures[t0_ref], atom_types_ref, hbond_config
)
n_hbonds = len(hbond_pairs_ref)
print(f"Identified {n_hbonds} potential hydrogen bond pairs")
# 步骤2: 准备时间原点列表
t0_list = list(range(0, n_frames - t_max, delta_t0))
if not t0_list:
t0_list = [0]
print(f"Using {len(t0_list)} time origins")
# 准备参数列表
args_list = [
(t0, structures, hbond_pairs_ref, hbond_config, t_max)
for t0 in t0_list
]
# 步骤3: 并行计算SCF
scf_total = np.zeros(t_max + 1)
scf_count = np.zeros(t_max + 1)
with multiprocessing.Pool(processes=workers) as pool:
results = list(tqdm(
pool.imap_unordered(calculate_scf_worker, args_list),
total=len(t0_list),
desc="Calculating SCF"
))
# 合并结果
for scf in results:
valid_length = min(len(scf), t_max + 1)
scf_total[:valid_length] += scf[:valid_length]
scf_count[:valid_length] += 1
# 计算平均SCF
scf_avg = np.zeros(t_max + 1)
for t in range(t_max + 1):
if scf_count[t] > 0:
scf_avg[t] = scf_total[t] / scf_count[t]
# 步骤4: 计算弛豫时间(积分法)
# 假设时间步长为0.1 fs (0.0001 ps)
time_ps = np.arange(t_max + 1) * 0.0001
integral = cumtrapz(scf_avg, time_ps, initial=0)
tau = integral[-1] # 积分弛豫时间
# 平滑SCF曲线
if t_max > 100:
scf_smoothed = gaussian_filter1d(scf_avg, sigma=3)
else:
scf_smoothed = scf_avg
# 输出结果
elapsed = time.time() - start_time
print(f"Hydrogen bond lifetime calculation completed in {elapsed:.2f} seconds")
print(f"Relaxation time (tau): {tau:.4f} ps")
return time_ps, scf_avg, scf_smoothed, tau
def plot_lifetime_results(time_ps, scf_avg, scf_smoothed, tau, system_name):
"""绘制氢键寿命结果(符合期刊要求)"""
os.makedirs("Lifetime_Plots", exist_ok=True)
plt.figure(figsize=(8, 6))
# 绘制原始数据和平滑曲线
plt.plot(time_ps, scf_avg, 'b-', alpha=0.3, label="Raw SCF")
plt.plot(time_ps, scf_smoothed, 'r-', linewidth=2.5, label="Smoothed SCF")
# 标记弛豫时间
plt.axvline(x=tau, color='g', linestyle='--', linewidth=1.5,
label=f"τ = {tau:.4f} ps")
# 设置标题和标签
plt.title(f"{system_name}: Hydrogen Bond Survival Correlation Function", fontsize=16)
plt.xlabel("Time (ps)", fontsize=14)
plt.ylabel("SCF(t)", fontsize=14)
plt.yscale('log') # 对数坐标更好展示衰减
plt.grid(True, linestyle='--', alpha=0.7)
# 设置坐标轴范围
plt.xlim(0, min(10, time_ps[-1])) # 最多显示10ps
plt.ylim(0.001, 1.1)
# 添加图例
plt.legend(loc='upper right', framealpha=0.8)
# 添加文本信息
plt.text(0.7, 0.95, f"τ = {tau:.4f} ps", transform=plt.gca().transAxes,
fontsize=12, verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
# 保存图像
plt.tight_layout()
filename = os.path.join("Lifetime_Plots", f"{system_name}_lifetime.tiff")
plt.savefig(filename, dpi=600, bbox_inches='tight')
plt.close()
print(f"Saved lifetime plot: {filename}")
# 保存数据
data_file = os.path.join("Lifetime_Plots", f"{system_name}_lifetime_data.csv")
with open(data_file, 'w') as f:
f.write("Time (ps),SCF(t),Smoothed SCF(t)\n")
for i in range(len(time_ps)):
f.write(f"{time_ps[i]},{scf_avg[i]},{scf_smoothed[i]}\n")
print(f"Saved lifetime data: {data_file}")
def parse_lammps_dump(dump_file):
"""解析LAMMPS转储文件"""
structures = []
current_frame = None
timestep = None
num_atoms = 0
box_bounds = None
atoms = []
atom_types = []
species_map = {}
with open(dump_file, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
if "ITEM: TIMESTEP" in line:
if current_frame is not None and atoms:
# 创建Structure对象
lattice = Lattice.from_parameters(
box_bounds[0][1] - box_bounds[0][0],
box_bounds[1][1] - box_bounds[1][0],
box_bounds[2][1] - box_bounds[2][0],
90, 90, 90
)
species = [species_map[atype] for atype in atom_types]
coords = np.array(atoms)
struct = Structure(lattice, species, coords)
structures.append(struct)
# 重置当前帧数据
current_frame = {"timestep": None, "box": None, "atoms": []}
atoms = []
atom_types = []
elif "ITEM: NUMBER OF ATOMS" in line:
num_atoms = int(next(f).strip())
elif "ITEM: BOX BOUNDS" in line:
box_bounds = []
for _ in range(3):
bounds = list(map(float, next(f).split()))
box_bounds.append(bounds)
elif "ITEM: ATOMS" in line:
headers = line.split()[2:]
id_idx = headers.index("id") if "id" in headers else 0
type_idx = headers.index("type") if "type" in headers else 1
x_idx = headers.index("x") if "x" in headers else 2
y_idx = headers.index("y") if "y" in headers else 3
z_idx = headers.index("z") if "z" in headers else 4
# 确定是否有元素类型信息
if "element" in headers:
element_idx = headers.index("element")
else:
element_idx = None
for _ in range(num_atoms):
atom_data = next(f).split()
atom_id = int(atom_data[id_idx])
atom_type = int(atom_data[type_idx])
# 获取元素符号
if element_idx is not None:
element = atom_data[element_idx]
else:
# 如果没有元素信息,使用类型映射
if atom_type not in species_map:
# 这里需要根据您的体系设置元素映射
# 示例映射:1=O, 2=H, 3=P, 4=Al, 5=F
type_to_element = {
1: "O", 2: "H", 3: "P", 4: "Al", 5: "F"
}
species_map[atom_type] = type_to_element.get(atom_type, "X")
element = species_map[atom_type]
x = float(atom_data[x_idx])
y = float(atom_data[y_idx])
z = float(atom_data[z_idx])
atoms.append([x, y, z])
atom_types.append(element)
# 添加最后一帧
if atoms:
lattice = Lattice.from_parameters(
box_bounds[0][1] - box_bounds[0][0],
box_bounds[1][1] - box_bounds[1][0],
box_bounds[2][1] - box_bounds[2][0],
90, 90, 90
)
species = [species_map[atype] for atype in atom_types]
coords = np.array(atoms)
struct = Structure(lattice, species, coords)
structures.append(struct)
print(f"Loaded {len(structures)} frames from LAMMPS dump file")
return structures
def load_lammps_trajectory(dump_file, data_file=None):
"""加载LAMMPS轨迹"""
return parse_lammps_dump(dump_file)
def main(workers=1):
"""主函数:加载轨迹并计算氢键寿命"""
# 定义要处理的体系
lammps_files = {
"MySystem": {
"dump_file": "trajectory.lammpstrj",
"data_file": "system.data"
}
}
for system_name, files in lammps_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}")
print(f"{'='*50}")
try:
# 加载LAMMPS轨迹
structures = load_lammps_trajectory(
files["dump_file"], files["data_file"]
)
# 计算氢键寿命
time_ps, scf_avg, scf_smoothed, tau = calculate_hbond_lifetime(
structures,
workers=workers,
delta_t0=50, # 时间原点间隔 (帧)
t_max=20000 # 最大时间延迟 (帧) - 对应2ps (0.1fs/帧)
)
# 绘制结果
plot_lifetime_results(time_ps, scf_avg, scf_smoothed, tau, system_name)
print(f"Completed processing for {system_name}")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}")
import traceback
traceback.print_exc()
print("\nHydrogen bond lifetime analysis completed successfully!")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate hydrogen bond lifetime from LAMMPS trajectory')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers (default: {multiprocessing.cpu_count()})')
parser.add_argument('--delta_t0', type=int, default=50,
help='Time origin spacing in frames (default: 50)')
parser.add_argument('--t_max', type=int, default=20000,
help='Maximum time delay in frames (default: 20000)')
args = parser.parse_args()
print(f"Starting hydrogen bond lifetime analysis with {args.workers} workers...")
print(f"Delta t0: {args.delta_t0} frames, Max time delay: {args.t_max} frames")
# 设置全局参数
def calculate_with_params(structures):
return calculate_hbond_lifetime(
structures,
workers=args.workers,
delta_t0=args.delta_t0,
t_max=args.t_max
)
main(workers=args.workers)其中报错Error processing MySystem: 'H'
Traceback (most recent call last):
File "hydrogen_bond_analysis7.py", line 618, in main
structures = load_lammps_trajectory(
File "hydrogen_bond_analysis7.py", line 599, in load_lammps_trajectory
return parse_lammps_dump(dump_file)
File "hydrogen_bond_analysis7.py", line 521, in parse_lammps_dump
species = [species_map[atype] for atype in atom_types]
File "hydrogen_bond_analysis7.py", line 521, in <listcomp>
species = [species_map[atype] for atype in atom_types]
KeyError: 'H'