import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree
from tqdm import tqdm
import os
import sys
import time
import argparse
import multiprocessing
from multiprocessing import Pool, Manager, cpu_count, Array, Value, Lock
from collections import defaultdict
import csv
import matplotlib as mpl
import warnings
import re
import traceback
import ctypes
import psutil
from datetime import datetime, timedelta
import itertools
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 原子类型映射 (根据Masses部分动态更新)
ATOM_TYPE_MAPPING = {}
TYPE_SYMBOL_MAPPING = {}
# 专业绘图设置
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
# 全局共享内存结构
class SharedMemoryManager:
def __init__(self, num_frames, num_hbond_types):
# 氢键事件存储: [frame][hbond_type] = set of (donor, acceptor)
self.hbond_events = [dict() for _ in range(num_frames)]
self.lock = Lock()
self.num_frames = num_frames
self.num_hbond_types = num_hbond_types
self.processed_frames = Value('i', 0)
self.start_time = Value('d', time.time())
def add_hbond_event(self, frame_idx, hbond_type, donor, acceptor):
with self.lock:
if hbond_type not in self.hbond_events[frame_idx]:
self.hbond_events[frame_idx][hbond_type] = set()
self.hbond_events[frame_idx][hbond_type].add((donor, acceptor))
def get_hbond_events(self, frame_idx, hbond_type):
with self.lock:
return self.hbond_events[frame_idx].get(hbond_type, set()).copy()
def increment_processed(self):
with self.lock:
self.processed_frames.value += 1
def get_progress(self):
with self.lock:
processed = self.processed_frames.value
elapsed = time.time() - self.start_time.value
if processed > 0:
time_per_frame = elapsed / processed
remaining = self.num_frames - processed
eta = time_per_frame * remaining
else:
eta = float('inf')
return processed, elapsed, eta
def wrap_coordinates(coords, box):
"""将坐标映射到周期性盒子内 [0, box] 区间"""
wrapped_coords = coords.copy()
for dim in range(3):
if box[dim] > 0:
# 将坐标调整到 [0, box[dim]) 区间
wrapped_coords[:, dim] = wrapped_coords[:, dim] % box[dim]
return wrapped_coords
def get_hbond_config():
"""返回氢键配置,包含距离和角度阈值"""
return [
{
"name": "P-O/P=O···Hw",
"donor_type": "water_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "water_hydrogens",
"distance_threshold": 2.375,
"angle_threshold": 143.99,
"color": "#1f77b4"
},
{
"name": "P-O/P=O···Hh",
"donor_type": "hydronium_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 157.82,
"color": "#ff7f0e"
},
{
"name": "P-O/P=O···Hp",
"donor_type": "P-OH",
"acceptor_type": "P-O/P=O",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.175,
"angle_threshold": 155.00,
"color": "#2ca02c"
},
{
"name": "P-OH···Ow",
"donor_type": "P-OH",
"acceptor_type": "water_oxygens",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 155.13,
"color": "#d62728"
},
{
"name": "Hw···Ow",
"donor_type": "water_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "water_hydrogens",
"distance_threshold": 2.375,
"angle_threshold": 138.73,
"color": "#9467bd"
},
{
"name": "Hh···Ow",
"donor_type": "hydronium_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.225,
"angle_threshold": 155.31,
"color": "#8c564b"
},
{
"name": "Hw···F",
"donor_type": "water_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "water_hydrogens",
"distance_threshold": 2.225,
"angle_threshold": 137.68,
"color": "#e377c2"
},
{
"name": "Hh···F",
"donor_type": "hydronium_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.175,
"angle_threshold": 154.64,
"color": "#7f7f7f"
},
{
"name": "Hp···F",
"donor_type": "P-OH",
"acceptor_type": "fluoride_atoms",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 153.71,
"color": "#bcbd22"
}
]
def calculate_angle(coords, lattice, donor_idx, h_idx, acceptor_idx):
"""计算D-H···A键角 (角度制),使用笛卡尔向量表示并处理周期性"""
# 获取分数坐标
frac_coords = coords
# 获取氢原子H的分数坐标
h_frac = frac_coords[h_idx]
# 计算供体D相对于H的分数坐标差
d_frac = frac_coords[donor_idx]
dh_frac = d_frac - h_frac
# 计算受体A相对于H的分数坐标差
a_frac = frac_coords[acceptor_idx]
ah_frac = a_frac - h_frac
# 应用周期性修正 (将分数坐标差限制在[-0.5, 0.5]范围内)
dh_frac = np.where(dh_frac > 0.5, dh_frac - 1, dh_frac)
dh_frac = np.where(dh_frac < -0.5, dh_frac + 1, dh_frac)
ah_frac = np.where(ah_frac > 0.5, ah_frac - 1, ah_frac)
ah_frac = np.where(ah_frac < -0.5, ah_frac + 1, ah_frac)
# 转换为笛卡尔向量 (H->D 和 H->A)
vec_hd = np.dot(dh_frac, lattice) # H->D向量
vec_ha = np.dot(ah_frac, lattice) # H->A向量
# 计算向量点积
dot_product = np.dot(vec_hd, vec_ha)
# 计算向量模长
norm_hd = np.linalg.norm(vec_hd)
norm_ha = np.linalg.norm(vec_ha)
# 避免除以零
if norm_hd < 1e-6 or norm_ha < 1e-6:
return None
# 计算余弦值
cos_theta = dot_product / (norm_hd * norm_ha)
# 处理数值误差
cos_theta = np.clip(cos_theta, -1.0, 1.0)
# 计算角度 (弧度转角度)
angle_rad = np.arccos(cos_theta)
angle_deg = np.degrees(angle_rad)
return angle_deg
def identify_atom_types(atom_types, coords, lattice, box):
"""原子类型识别函数 - 优化版本"""
# 初始化数据结构
atom_categories = {
"phosphate_oxygens": {"P-O/P=O": [], "P-OH": []},
"phosphate_hydrogens": [],
"water_oxygens": [],
"water_hydrogens": [],
"hydronium_oxygens": [],
"hydronium_hydrogens": [],
"fluoride_atoms": np.where(atom_types == "F")[0].tolist(),
"aluminum_atoms": np.where(atom_types == "Al")[0].tolist()
}
# 将坐标映射到盒子内 [0, box] 区间
wrapped_coords = wrap_coordinates(coords, box)
# 构建全局KDTree
kdtree = cKDTree(wrapped_coords, boxsize=box)
# 识别磷酸基团
p_atoms = np.where(atom_types == "P")[0]
phosphate_oxygens = []
for p_idx in p_atoms:
# 查找P周围的O原子 (距离 < 1.6Å)
neighbors = kdtree.query_ball_point(wrapped_coords[p_idx], r=1.6)
p_o_indices = [idx for idx in neighbors if idx != p_idx and atom_types[idx] == "O"]
phosphate_oxygens.extend(p_o_indices)
# 识别所有H原子并确定归属
hydrogen_owners = {}
h_atoms = np.where(atom_types == "H")[0]
for h_idx in h_atoms:
neighbors = kdtree.query_ball_point(wrapped_coords[h_idx], r=1.2)
candidate_os = [idx for idx in neighbors if idx != h_idx and atom_types[idx] == "O"]
if not candidate_os:
continue
min_dist = float('inf')
owner_o = None
for o_idx in candidate_os:
# 使用映射后的坐标计算距离
dist = np.linalg.norm(wrapped_coords[h_idx] - wrapped_coords[o_idx])
if dist < min_dist:
min_dist = dist
owner_o = o_idx
hydrogen_owners[h_idx] = owner_o
# 分类磷酸氧:带H的为P-OH,不带H的为P-O/P=O
for o_idx in phosphate_oxygens:
has_hydrogen = any(owner_o == o_idx for h_idx, owner_o in hydrogen_owners.items())
if has_hydrogen:
atom_categories["phosphate_oxygens"]["P-OH"].append(o_idx)
else:
atom_categories["phosphate_oxygens"]["P-O/P=O"].append(o_idx)
# 识别水和水合氢离子
all_o_indices = np.where(atom_types == "O")[0]
phosphate_oxygens_set = set(phosphate_oxygens)
non_phosphate_os = [o_idx for o_idx in all_o_indices if o_idx not in phosphate_oxygens_set]
o_h_count = {}
for h_idx, owner_o in hydrogen_owners.items():
o_h_count[owner_o] = o_h_count.get(owner_o, 0) + 1
for o_idx in non_phosphate_os:
h_count = o_h_count.get(o_idx, 0)
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
if h_count == 2:
atom_categories["water_oxygens"].append(o_idx)
atom_categories["water_hydrogens"].extend(attached_hs)
elif h_count == 3:
atom_categories["hydronium_oxygens"].append(o_idx)
atom_categories["hydronium_hydrogens"].extend(attached_hs)
# 识别磷酸基团的H原子
for o_idx in atom_categories["phosphate_oxygens"]["P-OH"]:
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
atom_categories["phosphate_hydrogens"].extend(attached_hs)
# 转换为数组以提高访问速度
for key in atom_categories:
if isinstance(atom_categories[key], dict):
for subkey in atom_categories[key]:
atom_categories[key][subkey] = np.array(atom_categories[key][subkey], dtype=np.int32)
else:
atom_categories[key] = np.array(atom_categories[key], dtype=np.int32)
return atom_categories
def find_hbonds_in_frame(coords, lattice, box, atom_categories, hbond_config):
"""在当前帧中寻找所有氢键 - 使用预先识别的原子类别"""
hbond_results = {hbond["name"]: [] for hbond in hbond_config}
# 检查原子类别是否为空
empty_categories = []
for cat, values in atom_categories.items():
if isinstance(values, dict):
for sub_cat, indices in values.items():
if len(indices) == 0:
empty_categories.append(f"{cat}.{sub_cat}")
else:
if len(values) == 0:
empty_categories.append(cat)
if empty_categories:
print(f"Warning: Empty atom categories: {', '.join(empty_categories)}")
# 将坐标映射到盒子内 [0, box] 区间
wrapped_coords = wrap_coordinates(coords, box)
# 构建全局KDTree
kdtree = cKDTree(wrapped_coords, boxsize=box)
# 处理每一类氢键
for hbond in hbond_config:
# 获取供体原子列表
if hbond["donor_type"] == "P-OH":
donors = atom_categories["phosphate_oxygens"]["P-OH"]
else:
donors = atom_categories[hbond["donor_type"]]
# 获取受体原子列表
if hbond["acceptor_type"] == "P-O/P=O":
acceptors = atom_categories["phosphate_oxygens"]["P-O/P=O"]
elif hbond["acceptor_type"] == "P-OH":
acceptors = atom_categories["phosphate_oxygens"]["P-OH"]
else:
acceptors = atom_categories[hbond["acceptor_type"]]
# 获取氢原子列表
hydrogens = atom_categories[hbond["h_type"]]
# 检查氢键组分是否为空
if len(donors) == 0:
print(f"Warning: No donors found for {hbond['name']}")
continue
if len(acceptors) == 0:
print(f"Warning: No acceptors found for {hbond['name']}")
continue
if len(hydrogens) == 0:
print(f"Warning: No hydrogens found for {hbond['name']}")
continue
# 为受体构建KDTree
acceptor_coords = wrapped_coords[acceptors]
acceptor_kdtree = cKDTree(acceptor_coords, boxsize=box)
# 遍历所有氢原子
for h_idx in hydrogens:
h_coords = wrapped_coords[h_idx]
# 查找与H成键的供体 (距离 < bond_threshold)
donor_neighbors = kdtree.query_ball_point(h_coords, r=1.3)
donor_candidates = [idx for idx in donor_neighbors if idx in donors]
# 如果没有找到供体,跳过
if not donor_candidates:
continue
# 选择最近的供体
min_dist = float('inf')
donor_idx = None
for d_idx in donor_candidates:
dist = np.linalg.norm(wrapped_coords[h_idx] - wrapped_coords[d_idx])
if dist < min_dist:
min_dist = dist
donor_idx = d_idx
# 查找在阈值内的受体
acceptor_indices = acceptor_kdtree.query_ball_point(h_coords, r=hbond["distance_threshold"])
for a_idx_offset in acceptor_indices:
a_idx = acceptors[a_idx_offset]
# 排除供体自身
if a_idx == donor_idx:
continue
# 计算键角 (使用镜像后的坐标)
angle = calculate_angle(wrapped_coords, lattice, donor_idx, h_idx, a_idx)
if angle is not None and angle >= hbond["angle_threshold"]:
# 记录氢键 (供体, 氢, 受体)
hbond_results[hbond["name"]].append((donor_idx, h_idx, a_idx))
return hbond_results
def read_data_file(data_file):
"""读取LAMMPS data文件,支持Materials Studio格式"""
atom_types = []
coords = []
box = np.zeros(3)
# 初始化状态机
section = None
box_dims = []
masses_parsed = False
with open(data_file, 'r') as f:
# 使用readline代替迭代器
line = f.readline()
while line:
stripped = line.strip()
# 跳过空行和REMARK行
if not stripped or stripped.startswith("REMARK"):
line = f.readline()
continue
# 检测部分标题
if "xlo xhi" in stripped:
parts = stripped.split()
if len(parts) >= 2:
try:
xlo = float(parts[0])
xhi = float(parts[1])
box_dims.append(xhi - xlo)
except ValueError:
pass
section = "box"
elif "ylo yhi" in stripped:
parts = stripped.split()
if len(parts) >= 2:
try:
ylo = float(parts[0])
yhi = float(parts[1])
box_dims.append(yhi - ylo)
except ValueError:
pass
elif "zlo zhi" in stripped:
parts = stripped.split()
if len(parts) >= 2:
try:
zlo = float(parts[0])
zhi = float(parts[1])
box_dims.append(zhi - zlo)
except ValueError:
pass
elif stripped.startswith("Masses"):
section = "masses"
# 读取下一行
line = f.readline()
# 跳过空行
while line.strip() == "":
line = f.readline()
# 处理Masses部分
while line.strip() and not line.strip().startswith("Atoms"):
stripped = line.strip()
if stripped and stripped[0].isdigit():
parts = re.split(r'\s+', stripped, 2)
if len(parts) >= 2:
try:
atom_type_num = int(parts[0])
mass = float(parts[1])
symbol = "Unknown"
if len(parts) > 2 and "#" in parts[2]:
comment = parts[2].split("#")[1].strip()
if comment:
symbol = comment
# 更新原子类型映射
ATOM_TYPE_MAPPING[atom_type_num] = symbol
TYPE_SYMBOL_MAPPING[symbol] = atom_type_num
except (ValueError, IndexError):
pass
line = f.readline()
continue # 跳过外层循环的readline
elif stripped.startswith("Atoms"):
section = "atoms"
# 读取下一行
line = f.readline()
# 跳过空行
while line.strip() == "":
line = f.readline()
# 处理Atoms部分
while line.strip() and not line.strip().startswith("Bonds") and not line.strip().startswith("Masses"):
stripped = line.strip()
if stripped and stripped[0].isdigit():
parts = re.split(r'\s+', stripped)
if len(parts) >= 5: # 至少需要id, type, x, y, z
try:
# 第一列是原子ID,第二列是原子类型
atom_id = int(parts[0])
atom_type_num = int(parts[1])
x = float(parts[2])
y = float(parts[3])
z = float(parts[4])
# 获取原子符号
atom_type = ATOM_TYPE_MAPPING.get(atom_type_num, "Unknown")
atom_types.append(atom_type)
coords.append([x, y, z])
except (ValueError, IndexError):
pass
line = f.readline()
continue # 跳过外层循环的readline
line = f.readline()
# 设置盒子尺寸
if len(box_dims) == 3:
box = np.array(box_dims)
else:
print("Warning: Could not parse box dimensions. Using default values.")
box = np.array([100.0, 100.0, 100.0]) # 默认盒子尺寸
# 确保我们有原子类型映射
if not ATOM_TYPE_MAPPING:
# 使用默认映射
default_mapping = {1: "H", 2: "O", 3: "F", 4: "Mg", 5: "Al", 6: "Si", 7: "P", 8: "Fe"}
ATOM_TYPE_MAPPING.update(default_mapping)
for num, sym in default_mapping.items():
TYPE_SYMBOL_MAPPING[sym] = num
return np.array(atom_types), np.array(coords), box
def read_lammpstrj_frame(file_handle, num_atoms):
"""从LAMMPS轨迹文件中读取一帧 - 优化版本"""
# 读取时间步
line = file_handle.readline().strip()
if not line:
return None
# 处理ITEM: TIMESTEP行
if line == "ITEM: TIMESTEP":
line = file_handle.readline().strip()
try:
timestep = int(line)
except ValueError:
# 尝试跳过可能的错误行
while line and not line.isdigit():
line = file_handle.readline().strip()
try:
timestep = int(line)
except:
return None
# 读取原子数量
line = file_handle.readline().strip()
if line == "ITEM: NUMBER OF ATOMS":
line = file_handle.readline().strip()
try:
frame_num_atoms = int(line)
if frame_num_atoms != num_atoms:
print(f"Warning: Atom count mismatch! Expected {num_atoms}, got {frame_num_atoms}")
# 跳过不匹配的帧
for _ in range(7): # 跳过剩余头部
file_handle.readline()
for _ in range(frame_num_atoms):
file_handle.readline()
return None
except ValueError:
return None
# 读取盒子尺寸
box_lines = []
for _ in range(4): # 读取最多4行,跳过可能的ITEM行
line = file_handle.readline().strip()
if line.startswith("ITEM: BOX BOUNDS"):
break
for _ in range(3):
box_line = file_handle.readline().split()
if len(box_line) < 2:
return None
box_lines.append(box_line)
# 解析盒子
try:
box_x = list(map(float, box_lines[0][:2]))
box_y = list(map(float, box_lines[1][:2]))
box_z = list(map(float, box_lines[2][:2]))
box = np.array([
box_x[1] - box_x[0],
box_y[1] - box_y[0],
box_z[1] - box_z[0]
])
except (ValueError, IndexError):
return None
# 读取原子数据头
line = file_handle.readline().strip()
if line.startswith("ITEM: ATOMS"):
pass # 正常情况
elif line and line[0].isdigit():
# 可能是原子数据行
file_handle.seek(file_handle.tell() - len(line) - 1)
# 读取原子数据 - 只读取坐标
coords = np.zeros((num_atoms, 3))
atom_count = 0
while atom_count < num_atoms:
line = file_handle.readline()
if not line:
break
parts = line.split()
if len(parts) < 5:
continue
try:
# 假设格式为:id type x y z
x = float(parts[2])
y = float(parts[3])
z = float(parts[4])
coords[atom_count] = [x, y, z]
atom_count += 1
except:
continue
if atom_count != num_atoms:
print(f"Warning: Expected {num_atoms} atoms, read {atom_count}")
return None
return {
"timestep": timestep,
"coords": coords,
"box": box
}
def calculate_hbond_lifetime(hbond_events, delta_t, max_tau):
"""计算氢键寿命相关函数 - 优化版本"""
# 初始化相关函数数组
corr_func = np.zeros(max_tau)
norm_factor = np.zeros(max_tau)
# 计算时间原点数量
num_frames = len(hbond_events)
if num_frames <= max_tau:
return np.zeros(max_tau)
# 遍历所有可能的时间原点
for t0 in range(0, num_frames - max_tau, delta_t):
# 获取当前时间原点存在的氢键
current_hbonds = hbond_events[t0]
# 遍历时间间隔
for tau in range(max_tau):
t = t0 + tau
if t >= num_frames:
break
# 计算在时间t仍然存在的氢键数量
surviving = current_hbonds & hbond_events[t]
corr_func[tau] += len(surviving)
norm_factor[tau] += len(current_hbonds)
# 归一化相关函数
for tau in range(max_tau):
if norm_factor[tau] > 0:
corr_func[tau] /= norm_factor[tau]
else:
corr_func[tau] = 0.0
return corr_func
def integrate_correlation_function(corr_func, dt):
"""积分相关函数计算弛豫时间"""
# 使用梯形法积分
integral = np.trapz(corr_func, dx=dt)
return integral
def process_frame(frame_data, atom_categories, lattice, hbond_config, frame_idx, shared_memory):
"""处理单帧数据 - 高度优化版本"""
try:
# 检查帧数据是否有效
if frame_data is None:
print(f"Error processing frame {frame_idx}: frame_data is None")
return False
# 检查必要键是否存在
if "coords" not in frame_data or "box" not in frame_data:
print(f"Error processing frame {frame_idx}: missing required keys in frame_data")
return False
# 直接使用全局的原子类别信息
coords = frame_data["coords"]
box = frame_data["box"]
# 查找氢键
frame_hbonds = find_hbonds_in_frame(
coords,
lattice,
box,
atom_categories,
hbond_config
)
# 检查氢键事件是否为空
total_hbonds = sum(len(bonds) for bonds in frame_hbonds.values())
if total_hbonds == 0:
print(f"Warning: No hydrogen bonds found in frame {frame_idx}")
# 但仍然继续处理,因为可能某些帧确实没有氢键
else:
print(f"Frame {frame_idx}: Found {total_hbonds} hydrogen bonds")
for hbond_type, bonds in frame_hbonds.items():
if len(bonds) > 0:
print(f" {hbond_type}: {len(bonds)} bonds")
# 存储到共享内存
for hbond in hbond_config:
hbond_name = hbond["name"]
bonds = frame_hbonds[hbond_name]
for bond in bonds:
# 使用(donor, acceptor)作为氢键标识符
donor_idx, _, acceptor_idx = bond
shared_memory.add_hbond_event(frame_idx, hbond_name, donor_idx, acceptor_idx)
# 更新进度
shared_memory.increment_processed()
return True
except Exception as e:
print(f"Error processing frame {frame_idx}: {str(e)}")
traceback.print_exc()
return False
def frame_reader(traj_file, frame_queue, num_atoms, total_frames):
"""轨迹读取器 - 在独立进程中运行"""
try:
with open(traj_file, 'r') as f:
frame_count = 0
while frame_count < total_frames:
frame_data = read_lammpstrj_frame(f, num_atoms)
if frame_data is None:
print("Reached end of trajectory file.")
break
# 只发送有效帧
frame_queue.put((frame_count, frame_data))
frame_count += 1
except Exception as e:
print(f"Error in frame_reader: {str(e)}")
traceback.print_exc()
finally:
# 发送结束信号,每个工作进程一个
print(f"Sending {multiprocessing.cpu_count()} termination signals")
for _ in range(multiprocessing.cpu_count()):
frame_queue.put((None, None)) # 结束信号
print("Frame reader exiting")
def process_worker(frame_queue, atom_categories, lattice, hbond_config, shared_memory):
"""处理器工作线程"""
print(f"Worker {os.getpid()} started")
while True:
try:
frame_item = frame_queue.get()
if frame_item is None:
print(f"Worker {os.getpid()} received None item")
break
frame_idx, frame_data = frame_item
# 检查是否为结束信号
if frame_idx is None or frame_data is None:
print(f"Worker {os.getpid()} received termination signal")
break
# 处理有效帧
success = process_frame(frame_data, atom_categories, lattice, hbond_config, frame_idx, shared_memory)
if not success:
print(f"Worker {os.getpid()} failed to process frame {frame_idx}")
except Exception as e:
print(f"Worker {os.getpid()} encountered error: {str(e)}")
traceback.print_exc()
break
print(f"Worker {os.getpid()} exiting")
def process_frame(frame_data, atom_categories, lattice, hbond_config, frame_idx, shared_memory):
"""处理单帧数据 - 高度优化版本"""
try:
# 检查帧数据是否有效
if frame_data is None:
print(f"Error processing frame {frame_idx}: frame_data is None")
return False
# 检查必要键是否存在
if "coords" not in frame_data or "box" not in frame_data:
print(f"Error processing frame {frame_idx}: missing required keys in frame_data")
return False
# 直接使用全局的原子类别信息
coords = frame_data["coords"]
box = frame_data["box"]
# 查找氢键
frame_hbonds = find_hbonds_in_frame(
coords,
lattice,
box,
atom_categories,
hbond_config
)
# 存储到共享内存
for hbond in hbond_config:
hbond_name = hbond["name"]
bonds = frame_hbonds[hbond_name]
for bond in bonds:
# 使用(donor, acceptor)作为氢键标识符
donor_idx, _, acceptor_idx = bond
shared_memory.add_hbond_event(frame_idx, hbond_name, donor_idx, acceptor_idx)
# 更新进度
shared_memory.increment_processed()
return True
except Exception as e:
print(f"Error processing frame {frame_idx}: {str(e)}")
traceback.print_exc()
return False
def progress_monitor(shared_memory, total_frames):
"""进度监视器 - 显示实时进度和预估时间"""
start_time = time.time()
last_update = 0
mem_monitor = psutil.Process()
with tqdm(total=total_frames, desc="Processing frames", unit="frame") as pbar:
while True:
processed, elapsed, eta = shared_memory.get_progress()
if processed >= total_frames:
break
# 更新进度条
pbar.n = processed
pbar.refresh()
# 每秒更新一次状态
if time.time() - last_update > 1.0:
# 计算内存使用
mem_usage = mem_monitor.memory_info().rss / (1024 ** 3) # GB
# 计算处理速度
fps = processed / elapsed if elapsed > 0 else 0
# 更新状态信息
pbar.set_postfix({
"FPS": f"{fps:.2f}",
"Mem": f"{mem_usage:.2f}GB",
"ETA": str(timedelta(seconds=int(eta))) if eta < 1e6 else "N/A"
})
last_update = time.time()
time.sleep(0.5)
pbar.n = total_frames
pbar.refresh()
def calculate_hbond_lifetimes_simple(data_file, traj_file, max_frames=100, delta_t=10, max_tau=1000, dt=0.05):
"""完全简化的氢键寿命计算,不使用多进程和共享内存"""
print("Starting simplified hydrogen bond lifetime analysis (single process, no multiprocessing)")
# 确保使用绝对路径
data_file = os.path.abspath(data_file)
traj_file = os.path.abspath(traj_file)
# 检查文件是否存在
if not os.path.exists(data_file):
print(f"Error: Data file not found: {data_file}")
return {}
if not os.path.exists(traj_file):
print(f"Error: Trajectory file not found: {traj_file}")
return {}
# 第一步:获取原子类别信息
print("Loading atom categories from data file...")
atom_types, initial_coords, initial_box = read_data_file(data_file)
lattice = np.diag(initial_box)
atom_categories = identify_atom_types(atom_types, initial_coords, lattice, initial_box)
num_atoms = len(atom_types)
print(f"Loaded {num_atoms} atoms with predefined categories")
# 打印原子类别统计
print("\nAtom Categories:")
for cat, values in atom_categories.items():
if isinstance(values, dict):
for sub_cat, indices in values.items():
print(f"{cat}.{sub_cat}: {len(indices)} atoms")
else:
print(f"{cat}: {len(values)} atoms")
# 第二步:获取总帧数
print("Counting frames in trajectory...")
total_frames = 0
with open(traj_file, 'r') as f:
while True:
frame_data = read_lammpstrj_frame(f, num_atoms)
if frame_data is None:
break
total_frames += 1
# 如果指定了最大帧数,则使用较小的值
if max_frames is not None and max_frames > 0:
actual_frames = min(total_frames, max_frames)
print(f"Limiting analysis to first {actual_frames} frames (out of {total_frames} total)")
total_frames = actual_frames
else:
print(f"Total frames: {total_frames:,}")
# 获取氢键配置
hbond_config = get_hbond_config()
# 初始化氢键事件记录器
hbond_events = {hbond["name"]: [] for hbond in hbond_config}
# 打开轨迹文件并处理每一帧
print("Processing frames...")
with open(traj_file, 'r') as f:
for frame_idx in tqdm(range(total_frames), desc="Processing frames"):
frame_data = read_lammpstrj_frame(f, num_atoms)
if frame_data is None:
break
# 检查帧数据
if "coords" not in frame_data or "box" not in frame_data:
print(f"Warning: Invalid frame data at frame {frame_idx}")
continue
# 查找氢键
frame_hbonds = find_hbonds_in_frame(
frame_data["coords"],
lattice,
frame_data["box"],
atom_categories,
hbond_config
)
# 记录氢键事件
for hbond in hbond_config:
hbond_name = hbond["name"]
bonds = frame_hbonds[hbond_name]
# 使用(donor, acceptor)作为氢键标识符
hbond_set = set()
for bond in bonds:
donor_idx, _, acceptor_idx = bond
hbond_set.add((donor_idx, acceptor_idx))
hbond_events[hbond_name].append(hbond_set)
# 每10帧打印一次进度
if frame_idx % 10 == 0 and frame_idx > 0:
total_hbonds = sum(len(bonds) for bonds in frame_hbonds.values())
print(f"Frame {frame_idx}: Found {total_hbonds} hydrogen bonds")
# 计算每种氢键类型的相关函数
results = {}
for hbond_type, events in hbond_events.items():
if not events or len(events) < max_tau:
print(f"Skipping {hbond_type}: insufficient data ({len(events)} frames)")
continue
# 检查是否有氢键事件
total_events = sum(len(event_set) for event_set in events)
if total_events == 0:
print(f"Skipping {hbond_type}: no hydrogen bond events detected")
continue
print(f"\nCalculating lifetime for {hbond_type}...")
try:
corr_func = calculate_hbond_lifetime(events, delta_t, max_tau)
tau = integrate_correlation_function(corr_func, dt)
results[hbond_type] = {"corr_func": corr_func, "tau": tau}
print(f" Relaxation time for {hbond_type}: {tau:.2f} fs")
except Exception as e:
print(f"Error calculating lifetime for {hbond_type}: {e}")
traceback.print_exc()
# 绘制相关函数
if results:
plot_correlation_functions(results, dt, max_tau)
else:
print("No results to plot")
print("Possible reasons:")
print("1. No hydrogen bonds detected in any frame")
print("2. Hydrogen bond detection parameters may need adjustment")
print("3. Atom type identification may be incorrect")
print("4. Trajectory file format may be incompatible")
return results
def plot_correlation_functions(results, dt, max_tau):
"""绘制氢键寿命相关函数"""
os.makedirs("hbond_lifetimes", exist_ok=True)
# 创建时间轴
time_axis = np.arange(0, max_tau * dt, dt)
# 创建图形
plt.figure(figsize=(12, 10))
# 绘制每种氢键类型的相关函数
for hbond_type, data in results.items():
corr_func = data["corr_func"]
tau = data["tau"]
# 截断时间轴以匹配相关函数长度
t_plot = time_axis[:len(corr_func)]
plt.plot(
t_plot, corr_func,
label=f"{hbond_type} (τ={tau:.2f} fs)",
linewidth=2
)
# 设置图形属性
plt.title("Hydrogen Bond Lifetime Correlation Functions", fontsize=18)
plt.xlabel("Time (fs)", fontsize=16)
plt.ylabel("Survival Probability", fontsize=16)
plt.yscale("log")
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12, framealpha=0.8, loc='best')
# 保存图像
plot_path = os.path.join("hbond_lifetimes", "hbond_lifetimes.tiff")
plt.tight_layout()
plt.savefig(plot_path, dpi=600)
plt.close()
print(f"\nSaved correlation function plot to: {plot_path}")
# 保存数据到CSV
csv_path = os.path.join("hbond_lifetimes", "hbond_lifetimes.csv")
with open(csv_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
header = ["Time (fs)"]
for hbond_type in results.keys():
header.append(f"{hbond_type} (C(t))")
writer.writerow(header)
for i in range(len(time_axis)):
if i >= max_tau:
break
row = [time_axis[i]]
for hbond_type, data in results.items():
corr_func = data["corr_func"]
if i < len(corr_func):
row.append(corr_func[i])
else:
row.append("")
writer.writerow(row)
# 保存弛豫时间数据
tau_path = os.path.join("hbond_lifetimes", "relaxation_times.csv")
with open(tau_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Hydrogen Bond Type", "Relaxation Time (fs)"])
for hbond_type, data in results.items():
writer.writerow([hbond_type, data["tau"]])
print(f"Saved lifetime data to: {csv_path}")
print(f"Saved relaxation times to: {tau_path}")
def test_first_frame(data_file, traj_file):
"""测试模式:只处理第一帧,包含详细调试信息"""
print("Running in test mode: analyzing first frame only")
try:
# 读取data文件获取初始原子类型和坐标
print(f"Reading data file: {data_file}")
atom_types, coords, box = read_data_file(data_file)
print(f"Successfully read data file: {data_file}")
print(f"Box dimensions: {box}")
print(f"Number of atoms: {len(atom_types)}")
# 打印原子类型映射
print("\nAtom Type Mapping:")
for type_num, symbol in ATOM_TYPE_MAPPING.items():
print(f"Type {type_num} -> {symbol}")
# 创建晶格矩阵 (正交盒子)
lattice = np.diag(box)
# 识别原子类别
print("\nIdentifying atom categories...")
atom_categories = identify_atom_types(atom_types, coords, lattice, box)
# 打印原子类别统计
print("\nAtom Categories:")
for cat, values in atom_categories.items():
if isinstance(values, dict):
for sub_cat, indices in values.items():
print(f"{cat}.{sub_cat}: {len(indices)} atoms")
else:
print(f"{cat}: {len(values)} atoms")
# 获取氢键配置
hbond_config = get_hbond_config()
# 查找氢键
print("\nFinding hydrogen bonds...")
hbond_results = find_hbonds_in_frame(coords, lattice, box, atom_categories, hbond_config)
# 打印氢键统计
print("\nHydrogen Bonds Found:")
total_hbonds = 0
for hbond_type, bonds in hbond_results.items():
print(f"{hbond_type}: {len(bonds)} bonds")
total_hbonds += len(bonds)
print(f"\nTotal Hydrogen Bonds: {total_hbonds}")
# 保存结果到CSV
os.makedirs("hbond_analysis", exist_ok=True)
csv_path = os.path.join("hbond_analysis", "first_frame_hbonds.csv")
with open(csv_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Hydrogen Bond Type", "Count"])
for hbond_type, bonds in hbond_results.items():
writer.writerow([hbond_type, len(bonds)])
print(f"\nSaved hydrogen bond counts to: {csv_path}")
except Exception as e:
print(f"Error during test mode: {str(e)}")
traceback.print_exc()
# 添加更详细的错误信息
print("\nAdditional debugging info:")
print(f"Data file: {data_file}")
print(f"Trajectory file: {traj_file}")
print(f"Atom types count: {len(atom_types) if 'atom_types' in locals() else 'N/A'}")
print(f"Coordinates shape: {coords.shape if 'coords' in locals() else 'N/A'}")
def main():
parser = argparse.ArgumentParser(description='Hydrogen Bond Analysis for LAMMPS Trajectories')
parser.add_argument('--test', action='store_true', help='Run in test mode (first frame only)')
parser.add_argument('--workers', type=int, default=1,
help=f'Number of parallel workers (default: 1, use 1 for single process)')
parser.add_argument('--delta_t', type=int, default=10,
help='Time origin spacing for correlation function (default: 10 frames)')
parser.add_argument('--max_tau', type=int, default=1000,
help='Maximum time for correlation function (default: 1000 frames)')
parser.add_argument('--dt', type=float, default=0.05,
help='Time step (ps) (default: 0.05 ps)')
parser.add_argument('--max_frames', type=int, default=100,
help='Maximum number of frames to process (default: 100)')
parser.add_argument('--simple', action='store_true', help='Use simple single-process mode (no multiprocessing)')
parser.add_argument('--debug', action='store_true', help='Run hydrogen bond detection debug')
parser.add_argument('--debug_phosphate', action='store_true', help='Debug phosphate-water hydrogen bonds specifically')
parser.add_argument('--debug_frame', type=int, default=0, help='Frame index for debugging')
args = parser.parse_args()
# 设置默认文件
data_file = "H3PO4-23pure.data"
traj_file = "nvt-P2O5-353K-23.lammpstrj"
# 检查文件是否存在
if not os.path.exists(data_file):
print(f"Error: Data file not found: {data_file}")
sys.exit(1)
if not os.path.exists(traj_file):
print(f"Error: Trajectory file not found: {traj_file}")
sys.exit(1)
start_time = time.time()
if args.debug_phosphate:
# 专门调试磷酸-水氢键
debug_phosphate_water_hbonds(data_file, traj_file, args.debug_frame)
elif args.debug:
# 通用调试模式
debug_hbond_detection(data_file, traj_file, args.debug_frame)
elif args.test:
# 测试模式:只处理第一帧
test_first_frame(data_file, traj_file)
elif args.simple:
# 使用简化版本(无多进程)
calculate_hbond_lifetimes_simple(
data_file, traj_file,
max_frames=args.max_frames,
delta_t=args.delta_t,
max_tau=args.max_tau,
dt=args.dt
)
else:
# 完整模式:计算氢键寿命(可能使用多进程)
calculate_hbond_lifetimes(
data_file, traj_file,
workers=args.workers,
delta_t=args.delta_t,
max_tau=args.max_tau,
dt=args.dt,
max_frames=args.max_frames
)
elapsed = time.time() - start_time
hours, rem = divmod(elapsed, 3600)
minutes, seconds = divmod(rem, 60)
print(f"\nAnalysis completed in {int(hours):02d}h:{int(minutes):02d}m:{seconds:06.3f}s")
if __name__ == "__main__":
# 设置多进程启动方法为'spawn',提高Windows兼容性
multiprocessing.set_start_method('spawn', force=True)
main()这个计算氢键寿命的时候输出为0,首先并行处理尝试了,workers为1也尝试了,简化单核处理也尝试了,而且目前第一帧的读取是没有问题,能够准确识别氢键类型及数量的,所以问题可能出在轨迹文件的读取?首先 我想看看你提出的解决方法,其次我在想用MDAnalysis读取轨迹文件和data文件会不会好一点?如果基于u=格式读取文件,那应该需要pdb格式和lammpstrij格式,我在这里将data替换为pdb,同时提供pdb格式文件在该目录下,输出完整的代码