import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
import os
import csv
import argparse
import multiprocessing
from scipy.stats import gaussian_kde
import time
import sys
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
def identify_atom_types(struct):
"""原子类型识别函数"""
# 初始化数据结构
atom_types = {
"phosphate_oxygens": {"P-O/P=O": [], "P-OH": []},
"phosphate_hydrogens": [],
"water_oxygens": [],
"water_hydrogens": [],
"hydronium_oxygens": [],
"hydronium_hydrogens": [],
"fluoride_atoms": [i for i, site in enumerate(struct) if site.species_string == "F"],
"aluminum_atoms": [i for i, site in enumerate(struct) if site.species_string == "Al"]
}
# 构建全局KDTree
all_coords = np.array([site.coords for site in struct])
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
# 识别磷酸基团
p_atoms = [i for i, site in enumerate(struct) if site.species_string == "P"]
phosphate_oxygens = []
for p_idx in p_atoms:
# 查找P周围的O原子 (距离 < 1.6Å)
neighbors = kdtree.query_ball_point(all_coords[p_idx], r=1.6)
p_o_indices = [idx for idx in neighbors if idx != p_idx and struct[idx].species_string == "O"]
phosphate_oxygens.extend(p_o_indices)
# 识别所有H原子并确定归属
hydrogen_owners = {}
h_atoms = [i for i, site in enumerate(struct) if site.species_string == "H"]
for h_idx in h_atoms:
neighbors = kdtree.query_ball_point(all_coords[h_idx], r=1.2)
candidate_os = [idx for idx in neighbors if idx != h_idx and struct[idx].species_string == "O"]
if not candidate_os:
continue
min_dist = float('inf')
owner_o = None
for o_idx in candidate_os:
dist = struct.get_distance(h_idx, o_idx)
if dist < min_dist:
min_dist = dist
owner_o = o_idx
hydrogen_owners[h_idx] = owner_o
# 分类磷酸氧:带H的为P-OH,不带H的为P-O/P=O
for o_idx in phosphate_oxygens:
has_hydrogen = any(owner_o == o_idx for h_idx, owner_o in hydrogen_owners.items())
if has_hydrogen:
atom_types["phosphate_oxygens"]["P-OH"].append(o_idx)
else:
atom_types["phosphate_oxygens"]["P-O/P=O"].append(o_idx)
# 识别水和水合氢离子
all_o_indices = [i for i, site in enumerate(struct) if site.species_string == "O"]
non_phosphate_os = [o_idx for o_idx in all_o_indices if o_idx not in phosphate_oxygens]
o_h_count = {}
for h_idx, owner_o in hydrogen_owners.items():
o_h_count[owner_o] = o_h_count.get(owner_o, 0) + 1
for o_idx in non_phosphate_os:
h_count = o_h_count.get(o_idx, 0)
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
if h_count == 2:
atom_types["water_oxygens"].append(o_idx)
atom_types["water_hydrogens"].extend(attached_hs)
elif h_count == 3:
atom_types["hydronium_oxygens"].append(o_idx)
atom_types["hydronium_hydrogens"].extend(attached_hs)
# 识别磷酸基团的H原子
for o_idx in atom_types["phosphate_oxygens"]["P-OH"]:
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
atom_types["phosphate_hydrogens"].extend(attached_hs)
return atom_types
def get_hbond_angle_config():
"""氢键分析配置"""
return [
{
"name": "P-O/P=O···Hw",
"donor_type": "water_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "water_hydrogens",
"threshold": 2.375,
"color": "blue"
},
{
"name": "P-O/P=O···Hh",
"donor_type": "hydronium_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "hydronium_hydrogens",
"threshold": 2.275,
"color": "green"
},
{
"name": "P-O/P=O···Hp",
"donor_type": "P-OH",
"acceptor_type": "P-O/P=O",
"h_type": "phosphate_hydrogens",
"threshold": 2.175,
"color": "red"
},
{
"name": "P-OH···Ow",
"donor_type": "P-OH",
"acceptor_type": "water_oxygens",
"h_type": "phosphate_hydrogens",
"threshold": 2.275,
"color": "orange"
},
{
"name": "Hw···Ow",
"donor_type": "water_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "water_hydrogens",
"threshold": 2.375,
"color": "purple"
},
{
"name": "Hh···Ow",
"donor_type": "hydronium_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "hydronium_hydrogens",
"threshold": 2.225,
"color": "cyan"
},
{
"name": "Hw···F",
"donor_type": "water_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "water_hydrogens",
"threshold": 2.225,
"color": "magenta"
},
{
"name": "Hh···F",
"donor_type": "hydronium_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "hydronium_hydrogens",
"threshold": 2.175,
"color": "brown"
},
{
"name": "Hp···F",
"donor_type": "P-OH",
"acceptor_type": "fluoride_atoms",
"h_type": "phosphate_hydrogens",
"threshold": 2.275,
"color": "pink"
}
]
def calculate_angle(struct, donor_idx, h_idx, acceptor_idx):
"""计算D-H···A键角 (角度制),使用笛卡尔向量表示并处理周期性"""
# 获取分数坐标
frac_coords = struct.frac_coords
lattice = struct.lattice
# 获取氢原子H的分数坐标
h_frac = frac_coords[h_idx]
# 计算供体D相对于H的分数坐标差
d_frac = frac_coords[donor_idx]
dh_frac = d_frac - h_frac
# 计算受体A相对于H的分数坐标差
a_frac = frac_coords[acceptor_idx]
ah_frac = a_frac - h_frac
# 应用周期性修正 (将分数坐标差限制在[-0.5, 0.5]范围内)
dh_frac = np.where(dh_frac > 0.5, dh_frac - 1, dh_frac)
dh_frac = np.where(dh_frac < -0.5, dh_frac + 1, dh_frac)
ah_frac = np.where(ah_frac > 0.5, ah_frac - 1, ah_frac)
ah_frac = np.where(ah_frac < -0.5, ah_frac + 1, ah_frac)
# 转换为笛卡尔向量 (H->D 和 H->A)
vec_hd = np.dot(dh_frac, lattice.matrix) # H->D向量
vec_ha = np.dot(ah_frac, lattice.matrix) # H->A向量
# 计算向量点积
dot_product = np.dot(vec_hd, vec_ha)
# 计算向量模长
norm_hd = np.linalg.norm(vec_hd)
norm_ha = np.linalg.norm(vec_ha)
# 避免除以零
if norm_hd < 1e-6 or norm_ha < 1e-6:
return None
# 计算余弦值
cos_theta = dot_product / (norm_hd * norm_ha)
# 处理数值误差
cos_theta = np.clip(cos_theta, -1.0, 1.0)
# 计算角度 (弧度转角度)
angle_rad = np.arccos(cos_theta)
angle_deg = np.degrees(angle_rad)
# 检查异常角度值
if angle_deg < 90 or angle_deg > 180:
print(f"警告: 异常键角值 {angle_deg:.2f}° (原子 {donor_idx}-{h_idx}-{acceptor_idx})")
print(f"向量HD: {vec_hd}, 长度: {norm_hd:.4f} Å")
print(f"向量HA: {vec_ha}, 长度: {norm_ha:.4f} Å")
print(f"点积: {dot_product:.4f}, cosθ: {cos_theta:.4f}")
return angle_deg
def calculate_hbond_angles_frame(struct, atom_types, hbond_config, bond_threshold=1.3):
"""计算单帧结构中氢键键角"""
# 构建全局KDTree用于快速搜索
all_coords = np.array([site.coords for site in struct])
lattice_abc = struct.lattice.abc
kdtree = cKDTree(all_coords, boxsize=lattice_abc)
# 结果字典: {氢键类型: [角度列表]}
angle_results = {hbond["name"]: [] for hbond in hbond_config}
# 处理每一类氢键
for hbond in hbond_config:
# 获取供体原子列表
if hbond["donor_type"] == "P-OH":
donors = atom_types["phosphate_oxygens"]["P-OH"]
else:
donors = atom_types[hbond["donor_type"]]
# 获取受体原子列表
if hbond["acceptor_type"] == "P-O/P=O":
acceptors = atom_types["phosphate_oxygens"]["P-O/P=O"]
elif hbond["acceptor_type"] == "P-OH":
acceptors = atom_types["phosphate_oxygens"]["P-OH"]
else:
acceptors = atom_types[hbond["acceptor_type"]]
# 获取氢原子列表
hydrogens = atom_types[hbond["h_type"]]
# 如果没有氢原子或受体,跳过
if len(hydrogens) == 0 or len(acceptors) == 0:
continue
# 为受体构建KDTree(使用全局坐标)
acceptor_coords = all_coords[acceptors]
acceptor_kdtree = cKDTree(acceptor_coords, boxsize=lattice_abc)
# 遍历所有氢原子
for h_idx in hydrogens:
h_coords = all_coords[h_idx]
# 查找与H成键的供体 (距离 < bond_threshold)
donor_neighbors = kdtree.query_ball_point(h_coords, r=bond_threshold)
donor_candidates = [idx for idx in donor_neighbors if idx in donors]
# 如果没有找到供体,跳过
if not donor_candidates:
continue
# 选择最近的供体
min_dist = float('inf')
donor_idx = None
for d_idx in donor_candidates:
dist = struct.get_distance(h_idx, d_idx)
if dist < min_dist:
min_dist = dist
donor_idx = d_idx
# 查找在阈值内的受体
acceptor_indices = acceptor_kdtree.query_ball_point(h_coords, r=hbond["threshold"])
for a_idx_offset in acceptor_indices:
a_idx = acceptors[a_idx_offset]
# 排除供体自身
if a_idx == donor_idx:
continue
# 计算键角
angle = calculate_angle(struct, donor_idx, h_idx, a_idx)
if angle is not None and angle >= 0.0:
angle_results[hbond["name"]].append(angle)
return angle_results
def calculate_hbond_angles_frame_wrapper(args):
"""包装函数用于多进程处理"""
struct, hbond_config = args
atom_types = identify_atom_types(struct)
return calculate_hbond_angles_frame(struct, atom_types, hbond_config)
def calculate_hbond_angles_parallel(structures, workers=1):
"""并行计算氢键键角"""
hbond_config = get_hbond_angle_config()
all_results = {hbond["name"]: [] for hbond in hbond_config}
# 准备参数列表
args_list = [(struct, hbond_config) for struct in structures]
# 如果没有可用的worker,则顺序执行
if workers == 1:
results = []
for args in tqdm(args_list, desc="Calculating HBond Angles"):
results.append(calculate_hbond_angles_frame_wrapper(args))
else:
with multiprocessing.Pool(processes=workers) as pool:
results = list(tqdm(
pool.imap(calculate_hbond_angles_frame_wrapper, args_list),
total=len(structures),
desc="Calculating HBond Angles"
))
# 合并结果
for frame_result in results:
for hbond_name, angles in frame_result.items():
all_results[hbond_name].extend(angles)
return all_results
def plot_hbond_angle_distribution(all_angles, system_name):
"""绘制氢键键角分布图并保存原始数据"""
# 创建输出目录
os.makedirs("HBond_Angle_Plots", exist_ok=True)
os.makedirs("HBond_Angle_Data", exist_ok=True)
hbond_config = get_hbond_angle_config()
# 创建统计信息汇总文件
summary_path = os.path.join("HBond_Angle_Data", f"{system_name}_summary.csv")
with open(summary_path, 'w', newline='') as summary_file:
summary_writer = csv.writer(summary_file)
summary_writer.writerow(["HBond Type", "Count", "Mean (°)", "Std Dev (°)", "Median (°)",
"90% Threshold (°)", "Min (°)", "Max (°)"]) # 添加90%阈值列
# 处理每种氢键类型
for hbond in hbond_config:
angles = np.array(all_angles[hbond["name"]])
if len(angles) == 0:
print(f"No angles found for {hbond['name']} in {system_name}")
continue
# 计算90%阈值(第10百分位数)
if len(angles) >= 5: # 确保有足够的数据点
threshold_90 = np.percentile(angles, 10) # 90%的氢键角度大于等于此值
else:
threshold_90 = np.nan
# 创建安全文件名
safe_name = hbond["name"].replace("/", "").replace("=", "").replace("···", "_").replace(" ", "_")
# ========== 保存原始角度数据 ==========
data_path = os.path.join("HBond_Angle_Data", f"{system_name}_{safe_name}.csv")
with open(data_path, 'w', newline='') as data_file:
data_writer = csv.writer(data_file)
data_writer.writerow(["Angle (degrees)"])
for angle in angles:
data_writer.writerow([f"{angle:.4f}"])
# ========== 计算统计量 ==========
mean_angle = np.mean(angles)
std_angle = np.std(angles)
median_angle = np.median(angles)
min_angle = np.min(angles)
max_angle = np.max(angles)
# 保存统计信息到汇总文件
with open(summary_path, 'a', newline='') as summary_file:
summary_writer = csv.writer(summary_file)
summary_writer.writerow([
hbond["name"],
len(angles),
f"{mean_angle:.4f}",
f"{std_angle:.4f}",
f"{median_angle:.4f}",
f"{threshold_90:.4f}", # 添加90%阈值
f"{min_angle:.4f}",
f"{max_angle:.4f}"
])
# ========== 绘制分布图 ==========
plt.figure(figsize=(8, 6))
# 绘制直方图
hist, bins, _ = plt.hist(angles, bins=30, density=True, alpha=0.5,
color=hbond["color"], label="Histogram")
# 绘制KDE曲线
kde = gaussian_kde(angles)
x = np.linspace(0, 180, 500)
plt.plot(x, kde(x), color="black", linewidth=2.5, label="KDE")
# 添加90%阈值线
if not np.isnan(threshold_90):
plt.axvline(x=threshold_90, color='red', linestyle='--', linewidth=2.0,
label=f'90% Threshold: {threshold_90:.2f}°')
# 添加统计信息
stats_text = (f"Mean: {mean_angle:.2f}°\n"
f"Std: {std_angle:.2f}°\n"
f"Median: {median_angle:.2f}°\n"
f"90% Threshold: {threshold_90:.2f}°\n" # 添加90%阈值
f"Count: {len(angles)}")
plt.text(0.75, 0.95, stats_text, transform=plt.gca().transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', alpha=0.5))
# 设置标题和标签
plt.title(f"{system_name}: {hbond['name']} Bond Angle Distribution", fontsize=16)
plt.xlabel("Bond Angle (degrees)", fontsize=14)
plt.ylabel("Probability Density", fontsize=14)
plt.xlim(0, 180)
plt.ylim(0, None)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
# 保存图像
image_path = os.path.join("HBond_Angle_Plots", f"{system_name}_{safe_name}_angle.tiff")
plt.savefig(image_path, dpi=600, bbox_inches='tight')
plt.close()
print(f"Saved HBond angle data: {data_path}")
print(f"Saved HBond angle plot: {image_path}")
print(f"Saved summary statistics: {summary_path}")
def main(vasprun_files, workers=1):
"""主处理函数"""
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}: {vasprun_file} with {workers} workers")
print(f"{'='*50}")
start_time = time.time()
try:
# 加载VASP结果
vr = Vasprun(vasprun_file, ionic_step_skip=5)
structures = vr.structures
print(f"Loaded {len(structures)} frames")
print(f"Lattice parameters: {structures[0].lattice.abc}")
print(f"Periodic boundary handling: Fractional coordinates + PBC correction")
# 计算氢键键角分布
print("Calculating hydrogen bond angles...")
hbond_angles = calculate_hbond_angles_parallel(structures, workers=workers)
# 绘制并保存结果
plot_hbond_angle_distribution(hbond_angles, system_name)
elapsed = time.time() - start_time
print(f"\nCompleted processing for {system_name} in {elapsed:.2f} seconds")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
print("\nAll HBond angle analysis completed successfully!")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate hydrogen bond angles from VASP simulations')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers (default: {multiprocessing.cpu_count()})')
args = parser.parse_args()
# 自动设置vasprun文件和系统名称
vasprun_files = {
"System1": "vasprun1.xml",
"System2": "vasprun2.xml",
"System3": "vasprun3.xml",
"System4": "vasprun4.xml"
}
# 检查文件是否存在
missing_files = [name for name, path in vasprun_files.items() if not os.path.exists(path)]
if missing_files:
raise FileNotFoundError(f"Missing vasprun files: {', '.join(missing_files)}")
print(f"Starting HBond angle analysis with {args.workers} workers...")
main(vasprun_files, workers=args.workers)以上代码根据D-H的化学距离,H-A之间的氢键距离来判定氢键,计算了氢键角度的分布,在这里我们需要根据原有的距离限制的基础上加入氢键角度的限制,来使氢键的存在更严谨,将代码中的部分修改def get_hbond_config():
"""返回氢键配置,包含距离和角度阈值"""
return [
{
"name": "P-O/P=O···Hw",
"donor_type": "water_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "water_hydrogens",
"distance_threshold": 2.375,
"angle_threshold": 143.99,
"color": "#1f77b4"
},
{
"name": "P-O/P=O···Hh",
"donor_type": "hydronium_oxygens",
"acceptor_type": "P-O/P=O",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 157.82,
"color": "#ff7f0e"
},
{
"name": "P-O/P=O···Hp",
"donor_type": "P-OH",
"acceptor_type": "P-O/P=O",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.175,
"angle_threshold": 155.00,
"color": "#2ca02c"
},
{
"name": "P-OH···Ow",
"donor_type": "P-OH",
"acceptor_type": "water_oxygens",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 155.13,
"color": "#d62728"
},
{
"name": "Hw···Ow",
"donor_type": "water_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "water_hydrogens",
"distance_threshold": 2.375,
"angle_threshold": 138.73,
"color": "#9467bd"
},
{
"name": "Hh···Ow",
"donor_type": "hydronium_oxygens",
"acceptor_type": "water_oxygens",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.225,
"angle_threshold": 155.31,
"color": "#8c564b"
},
{
"name": "Hw···F",
"donor_type": "water_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "water_hydrogens",
"distance_threshold": 2.225,
"angle_threshold": 137.68,
"color": "#e377c2"
},
{
"name": "Hh···F",
"donor_type": "hydronium_oxygens",
"acceptor_type": "fluoride_atoms",
"h_type": "hydronium_hydrogens",
"distance_threshold": 2.175,
"angle_threshold": 154.64,
"color": "#7f7f7f"
},
{
"name": "Hp···F",
"donor_type": "P-OH",
"acceptor_type": "fluoride_atoms",
"h_type": "phosphate_hydrogens",
"distance_threshold": 2.275,
"angle_threshold": 153.71,
"color": "#bcbd22"
}
],根据加入的角度限制,我们将沿用原代码中的识别逻辑,限制数值,将计算对象改为氢键键长随时间的变化,每10步输出一帧来计算,同样输出对应的Mean,median,std,对于埃的字符我们在这里采用LaTex来实现,图文的表达,则要符合The Journal of Chemical Physics期刊的要求,计算的该键长即是我们设置的distance_threshold部分的具体随时间变化的平均键长变化,实际上的氢键键长一般都在distance_threshold范围内,因为distance_threshold取的氢键键长距离的极限最大值。请确保LaTex正确表达距离埃的单位,而避免报错,其次仔细输出代码避免简单的问题出错
最新发布