import numpy as np
import matplotlib.pyplot as plt
import MDAnalysis as mda
from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis
from MDAnalysis.lib.distances import calc_angles
import tqdm
import os
import sys
import time
import argparse
import csv
import matplotlib as mpl
import warnings
import traceback
from datetime import datetime, timedelta
import re
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
# 原子类型映射
ATOM_TYPE_MAPPING = {
1: "H",
2: "O",
3: "F",
4: "Mg",
5: "Al",
6: "Si",
7: "P",
8: "Fe"
}
# 氢键配置
def get_hbond_config():
"""返回氢键配置,包含距离和角度阈值"""
return [
{
"name": "P-O/P=O···Hw",
"donor_selection": "name O and around 1.2 (name H and resname SOL)",
"acceptor_selection": "name O and bonded name P and not bonded name H",
"hydrogen_selection": "name H and resname SOL",
"distance_threshold": 2.375,
"angle_threshold": 143.99,
"color": "#1f77b4"
},
{
"name": "P-O/P=O···Hh",
"donor_selection": "name O and resname H3O",
"acceptor_selection": "name O and bonded name P and not bonded name H",
"hydrogen_selection": "name H and resname H3O",
"distance_threshold": 2.275,
"angle_threshold": 157.82,
"color": "#ff7f0e"
},
{
"name": "P-O/P=O···Hp",
"donor_selection": "name O and bonded name P and bonded name H",
"acceptor_selection": "name O and bonded name P and not bonded name H",
"hydrogen_selection": "name H and bonded name P",
"distance_threshold": 2.175,
"angle_threshold": 155.00,
"color": "#2ca02c"
},
{
"name": "P-OH···Ow",
"donor_selection": "name O and bonded name P and bonded name H",
"acceptor_selection": "name O and resname SOL",
"hydrogen_selection": "name H and bonded name P",
"distance_threshold": 2.275,
"angle_threshold": 155.13,
"color": "#d62728"
},
{
"name": "Hw···Ow",
"donor_selection": "name O and resname SOL",
"acceptor_selection": "name O and resname SOL",
"hydrogen_selection": "name H and resname SOL",
"distance_threshold": 2.375,
"angle_threshold": 138.73,
"color": "#9467bd"
},
{
"name": "Hh···Ow",
"donor_selection": "name O and resname H3O",
"acceptor_selection": "name O and resname SOL",
"hydrogen_selection": "name H and resname H3O",
"distance_threshold": 2.225,
"angle_threshold": 155.31,
"color": "#8c564b"
},
{
"name": "Hw···F",
"donor_selection": "name O and resname SOL",
"acceptor_selection": "name F",
"hydrogen_selection": "name H and resname SOL",
"distance_threshold": 2.225,
"angle_threshold": 137.68,
"color": "#e377c2"
},
{
"name": "Hh···F",
"donor_selection": "name O and resname H3O",
"acceptor_selection": "name F",
"hydrogen_selection": "name H and resname H3O",
"distance_threshold": 2.175,
"angle_threshold": 154.64,
"color": "#7f7f7f"
},
{
"name": "Hp···F",
"donor_selection": "name O and bonded name P and bonded name H",
"acceptor_selection": "name F",
"hydrogen_selection": "name H and bonded name P",
"distance_threshold": 2.275,
"angle_threshold": 153.71,
"color": "#bcbd22"
}
]
def convert_lammps_data_to_pdb(data_file, pdb_file):
"""将LAMMPS data文件转换为PDB格式"""
atoms = []
box = None
print(f"Converting {data_file} to {pdb_file}...")
with open(data_file, 'r') as f:
lines = f.readlines()
# 解析盒子尺寸
for i, line in enumerate(lines):
if "xlo xhi" in line:
parts = line.split()
xlo, xhi = float(parts[0]), float(parts[1])
elif "ylo yhi" in line:
parts = line.split()
ylo, yhi = float(parts[0]), float(parts[1])
elif "zlo zhi" in line:
parts = line.split()
zlo, zhi = float(parts[0]), float(parts[1])
box = [xhi - xlo, yhi - ylo, zhi - zlo]
break
# 解析原子信息
in_atoms_section = False
for line in lines:
if "Atoms" in line:
in_atoms_section = True
continue
if in_atoms_section and line.strip() and not line.startswith("#"):
parts = line.split()
if len(parts) >= 5:
atom_id = int(parts[0])
atom_type = int(parts[1])
x, y, z = float(parts[2]), float(parts[3]), float(parts[4])
element = ATOM_TYPE_MAPPING.get(atom_type, "X")
# 简单分配残基名称
if element == "O" and atom_type == 2:
resname = "SOL" # 水分子氧
elif element == "H" and atom_type == 1:
resname = "SOL" # 水分子氢
elif element == "P" and atom_type == 7:
resname = "PHO" # 磷酸
elif element == "F" and atom_type == 3:
resname = "F" # 氟
else:
resname = element
atoms.append({
'id': atom_id,
'type': atom_type,
'element': element,
'resname': resname,
'position': [x, y, z]
})
# 写入PDB文件
with open(pdb_file, 'w') as f:
# 写入标题
f.write("HEADER Converted from LAMMPS data file\n")
f.write(f"REMARK Original file: {data_file}\n")
if box:
f.write(f"CRYST1 {box[0]:9.3f} {box[1]:9.3f} {box[2]:9.3f} 90.00 90.00 90.00 P 1 1\n")
# 写入原子信息
for i, atom in enumerate(atoms):
# PDB格式: ATOM serial name altLoc resName chainID resSeq iCode x y z occupancy tempFactor element
f.write(f"ATOM {i+1:5d} {atom['element']:2s} {atom['resname']:3s} A{1:4d} {atom['position'][0]:8.3f}{atom['position'][1]:8.3f}{atom['position'][2]:8.3f} 1.00 0.00 {atom['element']:>2s}\n")
f.write("END\n")
print(f"Converted {len(atoms)} atoms to PDB format")
return pdb_file, box
def calculate_hbond_lifetime(hbond_events, delta_t, max_tau):
"""计算氢键寿命相关函数"""
# 初始化相关函数数组
corr_func = np.zeros(max_tau)
norm_factor = np.zeros(max_tau)
# 计算时间原点数量
num_frames = len(hbond_events)
if num_frames <= max_tau:
print(f"Warning: Not enough frames ({num_frames}) for max_tau ({max_tau})")
return np.zeros(max_tau)
# 遍历所有可能的时间原点
for t0 in range(0, num_frames - max_tau, delta_t):
# 获取当前时间原点存在的氢键
current_hbonds = hbond_events[t0]
if not current_hbonds: # 如果没有氢键,跳过
continue
# 遍历时间间隔
for tau in range(max_tau):
t = t0 + tau
if t >= num_frames:
break
# 计算在时间t仍然存在的氢键数量
surviving = current_hbonds & hbond_events[t]
corr_func[tau] += len(surviving)
norm_factor[tau] += len(current_hbonds)
# 归一化相关函数
for tau in range(max_tau):
if norm_factor[tau] > 0:
corr_func[tau] /= norm_factor[tau]
else:
corr_func[tau] = 0.0
return corr_func
def integrate_correlation_function(corr_func, dt):
"""积分相关函数计算弛豫时间"""
# 使用梯形法积分
integral = np.trapz(corr_func, dx=dt)
return integral
def analyze_hbonds_with_mdanalysis(pdb_file, traj_file, max_frames=None, delta_t=10, max_tau=1000, dt=0.05):
"""使用MDAnalysis分析氢键寿命"""
print("Starting hydrogen bond lifetime analysis with MDAnalysis")
# 设置文件路径
pdb_file = os.path.abspath(pdb_file)
traj_file = os.path.abspath(traj_file)
# 检查文件是否存在
if not os.path.exists(pdb_file):
print(f"Error: PDB file not found: {pdb_file}")
return {}
if not os.path.exists(traj_file):
print(f"Error: Trajectory file not found: {traj_file}")
return {}
# 创建MDAnalysis Universe
print("Loading trajectory with MDAnalysis...")
try:
u = mda.Universe(pdb_file, traj_file, format="LAMMPS")
print(f"Loaded universe with {len(u.atoms)} atoms")
print(f"Trajectory has {len(u.trajectory)} frames")
except Exception as e:
print(f"Error loading trajectory: {e}")
return {}
# 获取总帧数
total_frames = len(u.trajectory)
if max_frames is not None and max_frames > 0:
total_frames = min(total_frames, max_frames)
print(f"Total frames to process: {total_frames}")
# 获取氢键配置
hbond_config = get_hbond_config()
# 初始化氢键事件记录器
hbond_events = {hbond["name"]: [] for hbond in hbond_config}
# 处理每一帧
print("Processing frames...")
for i, ts in tqdm.tqdm(enumerate(u.trajectory), total=total_frames, desc="Analyzing frames"):
if i >= total_frames:
break
# 为每种氢键类型检测氢键
frame_hbonds = {hbond["name"]: set() for hbond in hbond_config}
for hbond in hbond_config:
try:
# 选择供体、受体和氢原子
donors = u.select_atoms(hbond["donor_selection"])
acceptors = u.select_atoms(hbond["acceptor_selection"])
hydrogens = u.select_atoms(hbond["hydrogen_selection"])
if len(donors) == 0 or len(acceptors) == 0 or len(hydrogens) == 0:
continue
# 计算所有可能的D-H···A组合
for donor in donors:
# 找到与供体相连的氢原子
donor_hydrogens = [h for h in hydrogens if
np.linalg.norm(h.position - donor.position) < 1.2]
for hydrogen in donor_hydrogens:
for acceptor in acceptors:
# 排除供体自身
if donor.index == acceptor.index:
continue
# 计算距离和角度
dist = np.linalg.norm(acceptor.position - hydrogen.position)
if dist > hbond["distance_threshold"]:
continue
# 计算角度 (D-H-A)
vec_dh = donor.position - hydrogen.position
vec_ha = acceptor.position - hydrogen.position
# 处理周期性边界条件
for dim in range(3):
if u.dimensions[dim] > 0:
if vec_dh[dim] > u.dimensions[dim]/2:
vec_dh[dim] -= u.dimensions[dim]
elif vec_dh[dim] < -u.dimensions[dim]/2:
vec_dh[dim] += u.dimensions[dim]
if vec_ha[dim] > u.dimensions[dim]/2:
vec_ha[dim] -= u.dimensions[dim]
elif vec_ha[dim] < -u.dimensions[dim]/2:
vec_ha[dim] += u.dimensions[dim]
# 计算角度
cos_angle = np.dot(vec_dh, vec_ha) / (np.linalg.norm(vec_dh) * np.linalg.norm(vec_ha))
cos_angle = np.clip(cos_angle, -1.0, 1.0)
angle = np.degrees(np.arccos(cos_angle))
if angle >= hbond["angle_threshold"]:
# 记录氢键 (供体, 受体)
frame_hbonds[hbond["name"]].add((donor.index, acceptor.index))
except Exception as e:
print(f"Error processing {hbond['name']} in frame {i}: {e}")
continue
# 记录当前帧的氢键
for hbond_name, bonds in frame_hbonds.items():
hbond_events[hbond_name].append(bonds)
# 每10帧打印一次进度
if i % 10 == 0 and i > 0:
total_hbonds = sum(len(bonds) for bonds in frame_hbonds.values())
print(f"Frame {i}: Found {total_hbonds} hydrogen bonds")
# 计算每种氢键类型的相关函数
results = {}
for hbond_type, events in hbond_events.items():
if not events or len(events) < max_tau:
print(f"Skipping {hbond_type}: insufficient data ({len(events)} frames)")
continue
# 检查是否有氢键事件
total_events = sum(len(event_set) for event_set in events)
if total_events == 0:
print(f"Skipping {hbond_type}: no hydrogen bond events detected")
continue
print(f"\nCalculating lifetime for {hbond_type}...")
print(f" Total events: {total_events}")
print(f" Frames with events: {sum(1 for e in events if len(e) > 0)}")
try:
corr_func = calculate_hbond_lifetime(events, delta_t, max_tau)
tau = integrate_correlation_function(corr_func, dt)
results[hbond_type] = {"corr_func": corr_func, "tau": tau}
print(f" Relaxation time for {hbond_type}: {tau:.2f} fs")
# 输出相关函数的前几个值用于调试
print(f" Correlation function (first 10 values): {corr_func[:10]}")
except Exception as e:
print(f"Error calculating lifetime for {hbond_type}: {e}")
traceback.print_exc()
return results
def plot_correlation_functions(results, dt, max_tau):
"""绘制氢键寿命相关函数"""
os.makedirs("hbond_lifetimes", exist_ok=True)
# 创建时间轴
time_axis = np.arange(0, max_tau * dt, dt)
# 创建图形
plt.figure(figsize=(12, 10))
# 绘制每种氢键类型的相关函数
for hbond_type, data in results.items():
corr_func = data["corr_func"]
tau = data["tau"]
# 截断时间轴以匹配相关函数长度
t_plot = time_axis[:len(corr_func)]
plt.plot(
t_plot, corr_func,
label=f"{hbond_type} (τ={tau:.2f} fs)",
linewidth=2
)
# 设置图形属性
plt.title("Hydrogen Bond Lifetime Correlation Functions", fontsize=18)
plt.xlabel("Time (fs)", fontsize=16)
plt.ylabel("Survival Probability", fontsize=16)
plt.yscale("log")
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12, framealpha=0.8, loc='best')
# 保存图像
plot_path = os.path.join("hbond_lifetimes", "hbond_lifetimes.tiff")
plt.tight_layout()
plt.savefig(plot_path, dpi=600)
plt.close()
print(f"\nSaved correlation function plot to: {plot_path}")
# 保存数据到CSV
csv_path = os.path.join("hbond_lifetimes", "hbond_lifetimes.csv")
with open(csv_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
header = ["Time (fs)"]
for hbond_type in results.keys():
header.append(f"{hbond_type} (C(t))")
writer.writerow(header)
for i in range(len(time_axis)):
if i >= max_tau:
break
row = [time_axis[i]]
for hbond_type, data in results.items():
corr_func = data["corr_func"]
if i < len(corr_func):
row.append(corr_func[i])
else:
row.append("")
writer.writerow(row)
# 保存弛豫时间数据
tau_path = os.path.join("hbond_lifetimes", "relaxation_times.csv")
with open(tau_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Hydrogen Bond Type", "Relaxation Time (fs)"])
for hbond_type, data in results.items():
writer.writerow([hbond_type, data["tau"]])
print(f"Saved lifetime data to: {csv_path}")
print(f"Saved relaxation times to: {tau_path}")
def test_first_frame_mdanalysis(pdb_file, traj_file):
"""测试模式:使用MDAnalysis只处理第一帧"""
print("Running in test mode: analyzing first frame only with MDAnalysis")
try:
# 创建MDAnalysis Universe
u = mda.Universe(pdb_file, traj_file, format="LAMMPSDUMP")
print(f"Loaded universe with {len(u.atoms)} atoms")
# 获取氢键配置
hbond_config = get_hbond_config()
# 处理第一帧
ts = u.trajectory[0]
# 为每种氢键类型检测氢键
hbond_results = {hbond["name"]: 0 for hbond in hbond_config}
for hbond in hbond_config:
try:
# 选择供体、受体和氢原子
donors = u.select_atoms(hbond["donor_selection"])
acceptors = u.select_atoms(hbond["acceptor_selection"])
hydrogens = u.select_atoms(hbond["hydrogen_selection"])
print(f"{hbond['name']}: Found {len(donors)} donors, {len(acceptors)} acceptors, {len(hydrogens)} hydrogens")
if len(donors) == 0 or len(acceptors) == 0 or len(hydrogens) == 0:
continue
# 计算所有可能的D-H···A组合
hbond_count = 0
for donor in donors:
# 找到与供体相连的氢原子
donor_hydrogens = [h for h in hydrogens if
np.linalg.norm(h.position - donor.position) < 1.2]
for hydrogen in donor_hydrogens:
for acceptor in acceptors:
# 排除供体自身
if donor.index == acceptor.index:
continue
# 计算距离和角度
dist = np.linalg.norm(acceptor.position - hydrogen.position)
if dist > hbond["distance_threshold"]:
continue
# 计算角度 (D-H-A)
vec_dh = donor.position - hydrogen.position
vec_ha = acceptor.position - hydrogen.position
# 处理周期性边界条件
for dim in range(3):
if u.dimensions[dim] > 0:
if vec_dh[dim] > u.dimensions[dim]/2:
vec_dh[dim] -= u.dimensions[dim]
elif vec_dh[dim] < -u.dimensions[dim]/2:
vec_dh[dim] += u.dimensions[dim]
if vec_ha[dim] > u.dimensions[dim]/2:
vec_ha[dim] -= u.dimensions[dim]
elif vec_ha[dim] < -u.dimensions[dim]/2:
vec_ha[dim] += u.dimensions[dim]
# 计算角度
cos_angle = np.dot(vec_dh, vec_ha) / (np.linalg.norm(vec_dh) * np.linalg.norm(vec_ha))
cos_angle = np.clip(cos_angle, -1.0, 1.0)
angle = np.degrees(np.arccos(cos_angle))
if angle >= hbond["angle_threshold"]:
hbond_count += 1
hbond_results[hbond["name"]] = hbond_count
print(f"{hbond['name']}: Found {hbond_count} hydrogen bonds")
except Exception as e:
print(f"Error processing {hbond['name']}: {e}")
continue
# 打印氢键统计
print("\nHydrogen Bonds Found:")
total_hbonds = 0
for hbond_type, count in hbond_results.items():
print(f"{hbond_type}: {count} bonds")
total_hbonds += count
print(f"\nTotal Hydrogen Bonds: {total_hbonds}")
# 保存结果到CSV
os.makedirs("hbond_analysis", exist_ok=True)
csv_path = os.path.join("hbond_analysis", "first_frame_hbonds_mdanalysis.csv")
with open(csv_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Hydrogen Bond Type", "Count"])
for hbond_type, count in hbond_results.items():
writer.writerow([hbond_type, count])
print(f"\nSaved hydrogen bond counts to: {csv_path}")
except Exception as e:
print(f"Error during test mode: {str(e)}")
traceback.print_exc()
def main():
parser = argparse.ArgumentParser(description='Hydrogen Bond Analysis for LAMMPS Trajectories using MDAnalysis')
parser.add_argument('--test', action='store_true', help='Run in test mode (first frame only)')
parser.add_argument('--delta_t', type=int, default=10,
help='Time origin spacing for correlation function (default: 10 frames)')
parser.add_argument('--max_tau', type=int, default=1000,
help='Maximum time for correlation function (default: 1000 frames)')
parser.add_argument('--dt', type=float, default=0.05,
help='Time step (ps) (default: 0.05 ps)')
parser.add_argument('--max_frames', type=int, default=100,
help='Maximum number of frames to process (default: 100)')
parser.add_argument('--convert_only', action='store_true', help='Only convert data file to PDB format and exit')
args = parser.parse_args()
# 设置默认文件
data_file = "H3PO4-23pure.data"
traj_file = "nvt-P2O5-353K-23.lammpstrj"
pdb_file = "H3PO4-23pure.pdb"
# 检查文件是否存在
if not os.path.exists(data_file):
print(f"Error: Data file not found: {data_file}")
sys.exit(1)
if not os.path.exists(traj_file):
print(f"Error: Trajectory file not found: {traj_file}")
sys.exit(1)
# 转换data文件为PDB格式
if not os.path.exists(pdb_file):
pdb_file, box = convert_lammps_data_to_pdb(data_file, pdb_file)
else:
print(f"Using existing PDB file: {pdb_file}")
# 读取盒子尺寸
with open(pdb_file, 'r') as f:
for line in f:
if line.startswith("CRYST1"):
parts = line.split()
box = [float(parts[1]), float(parts[2]), float(parts[3])]
break
if args.convert_only:
print("Conversion complete. Exiting.")
return
start_time = time.time()
if args.test:
# 测试模式:只处理第一帧
test_first_frame_mdanalysis(pdb_file, traj_file)
else:
# 完整分析
results = analyze_hbonds_with_mdanalysis(
pdb_file, traj_file,
max_frames=args.max_frames,
delta_t=args.delta_t,
max_tau=args.max_tau,
dt=args.dt
)
# 绘制结果
if results:
plot_correlation_functions(results, args.dt, args.max_tau)
else:
print("No results to plot")
print("Possible reasons:")
print("1. No hydrogen bonds detected in any frame")
print("2. Hydrogen bond detection parameters may need adjustment")
print("3. Atom selection criteria may be incorrect")
elapsed = time.time() - start_time
hours, rem = divmod(elapsed, 3600)
minutes, seconds = divmod(rem, 60)
print(f"\nAnalysis completed in {int(hours):02d}h:{int(minutes):02d}m:{seconds:06.3f}s")
if __name__ == "__main__":
main()上面的代码我测试过是可运行的,只不过在这里我们将其修改为只需要轨迹文件lammpstrij可执行的代码,也就是说原子的类型通过数字ID来表示,加入原子ID的映射部分,然后依旧按照原理的计算原理输出完整的代码,依旧的识别逻辑,只不过通过数字ID来表示先