import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
import os
import csv
import argparse
import multiprocessing
from scipy.stats import gaussian_kde
import time
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置 - 符合Journal of Chemical Physics要求
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
def identify_atom_types(struct):
"""优化的原子类型识别函数,不再区分P=O和P-O"""
# 初始化数据结构
atom_types = {
"phosphate_oxygens": {"P-O/P=O": [], "P-OH": []},
"phosphate_hydrogens": [],
"water_oxygens": [],
"water_hydrogens": [],
"hydronium_oxygens": [],
"hydronium_hydrogens": [],
"fluoride_atoms": [i for i, site in enumerate(struct) if site.species_string == "F"],
"aluminum_atoms": [i for i, site in enumerate(struct) if site.species_string == "Al"]
}
# 构建全局KDTree
all_coords = np.array([site.coords for site in struct])
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
# 识别磷酸基团
p_atoms = [i for i, site in enumerate(struct) if site.species_string == "P"]
phosphate_oxygens = []
for p_idx in p_atoms:
# 查找P周围的O原子 (距离 < 1.6Å)
neighbors = kdtree.query_ball_point(all_coords[p_idx], r=1.6)
p_o_indices = [idx for idx in neighbors if idx != p_idx and struct[idx].species_string == "O"]
phosphate_oxygens.extend(p_o_indices)
# 识别所有H原子并确定归属
hydrogen_owners = {}
h_atoms = [i for i, site in enumerate(struct) if site.species_string == "H"]
for h_idx in h_atoms:
neighbors = kdtree.query_ball_point(all_coords[h_idx], r=1.2)
candidate_os = [idx for idx in neighbors if idx != h_idx and struct[idx].species_string == "O"]
if not candidate_os:
continue
min_dist = float('inf')
owner_o = None
for o_idx in candidate_os:
dist = struct.get_distance(h_idx, o_idx)
if dist < min_dist:
min_dist = dist
owner_o = o_idx
hydrogen_owners[h_idx] = owner_o
# 分类磷酸氧:带H的为P-OH,不带H的为P-O/P=O
for o_idx in phosphate_oxygens:
has_hydrogen = any(owner_o == o_idx for h_idx, owner_o in hydrogen_owners.items())
if has_hydrogen:
atom_types["phosphate_oxygens"]["P-OH"].append(o_idx)
else:
atom_types["phosphate_oxygens"]["P-O/P=O"].append(o_idx)
# 识别水和水合氢离子
all_o_indices = [i for i, site in enumerate(struct) if site.species_string == "O"]
non_phosphate_os = [o_idx for o_idx in all_o_indices if o_idx not in phosphate_oxygens]
o_h_count = {}
for h_idx, owner_o in hydrogen_owners.items():
o_h_count[owner_o] = o_h_count.get(owner_o, 0) + 1
for o_idx in non_phosphate_os:
h_count = o_h_count.get(o_idx, 0)
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
if h_count == 2:
atom_types["water_oxygens"].append(o_idx)
atom_types["water_hydrogens"].extend(attached_hs)
elif h_count == 3:
atom_types["hydronium_oxygens"].append(o_idx)
atom_types["hydronium_hydrogens"].extend(attached_hs)
# 识别磷酸基团的H原子
for o_idx in atom_types["phosphate_oxygens"]["P-OH"]:
attached_hs = [h_idx for h_idx, owner_o in hydrogen_owners.items() if owner_o == o_idx]
atom_types["phosphate_hydrogens"].extend(attached_hs)
return atom_types
def get_hbond_angle_config():
"""返回九类氢键的键角分析配置"""
return [
{
"name": "P-O/P=O···Hw",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "P-O/P=O", # 受体氧 (磷酸中的非羟基氧)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.375, # 氢键距离阈值 (Å)
"color": "blue"
},
{
"name": "P-O/P=O···Hh",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "P-O/P=O",
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.275,
"color": "green"
},
{
"name": "P-O/P=O···Hp",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "P-O/P=O",
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.175,
"color": "red"
},
{
"name": "P-OH···Ow",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "water_oxygens", # 受体氧 (水中的O)
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.275,
"color": "orange"
},
{
"name": "Hw···Ow",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "water_oxygens", # 受体氧 (另一个水中的O)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.375,
"color": "purple"
},
{
"name": "Hh···Ow",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "water_oxygens", # 受体氧 (水中的O)
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.225,
"color": "cyan"
},
{
"name": "Hw···F",
"donor_type": "water_oxygens", # 供体氧 (水中的O)
"acceptor_type": "fluoride_atoms", # 受体 (氟离子)
"h_type": "water_hydrogens", # 氢原子 (水中的H)
"threshold": 2.225,
"color": "magenta"
},
{
"name": "Hh···F",
"donor_type": "hydronium_oxygens", # 供体氧 (水合氢离子中的O)
"acceptor_type": "fluoride_atoms",
"h_type": "hydronium_hydrogens", # 氢原子 (水合氢离子中的H)
"threshold": 2.175,
"color": "brown"
},
{
"name": "Hp···F",
"donor_type": "P-OH", # 供体氧 (磷酸中的羟基氧)
"acceptor_type": "fluoride_atoms",
"h_type": "phosphate_hydrogens", # 氢原子 (磷酸中的H)
"threshold": 2.275,
"color": "pink"
}
]
def calculate_angle(d_coords, h_coords, a_coords):
"""计算D-H···A键角 (角度制)"""
vec_dh = h_coords - d_coords
vec_ha = a_coords - h_coords
# 计算向量点积
dot_product = np.dot(vec_dh, vec_ha)
# 计算向量模长
norm_dh = np.linalg.norm(vec_dh)
norm_ha = np.linalg.norm(vec_ha)
# 避免除以零
if norm_dh < 1e-6 or norm_ha < 1e-6:
return None
# 计算余弦值
cos_theta = dot_product / (norm_dh * norm_ha)
# 处理数值误差
cos_theta = np.clip(cos_theta, -1.0, 1.0)
# 计算角度 (弧度转角度)
angle_rad = np.arccos(cos_theta)
angle_deg = np.degrees(angle_rad)
return angle_deg
def calculate_hbond_angles_frame(struct, atom_types, hbond_config, bond_threshold=1.3):
"""计算单帧结构中所有氢键的键角"""
# 构建全局KDTree用于快速搜索
all_coords = np.array([site.coords for site in struct])
kdtree = cKDTree(all_coords, boxsize=struct.lattice.abc)
# 结果字典: {氢键类型: [角度列表]}
angle_results = {hbond["name"]: [] for hbond in hbond_config}
# 处理每一类氢键
for hbond in hbond_config:
# 获取供体原子列表
if hbond["donor_type"] == "P-OH":
donors = atom_types["phosphate_oxygens"]["P-OH"]
else:
donors = atom_types[hbond["donor_type"]]
# 获取受体原子列表
if hbond["acceptor_type"] == "P-O/P=O":
acceptors = atom_types["phosphate_oxygens"]["P-O/P=O"]
elif hbond["acceptor_type"] == "P-OH":
acceptors = atom_types["phosphate_oxygens"]["P-OH"]
else:
acceptors = atom_types[hbond["acceptor_type"]]
# 获取氢原子列表
hydrogens = atom_types[hbond["h_type"]]
# 如果没有氢原子或受体,跳过
if len(hydrogens) == 0 or len(acceptors) == 0:
continue
# 为受体构建KDTree
acceptor_coords = np.array([struct[i].coords for i in acceptors])
acceptor_kdtree = cKDTree(acceptor_coords, boxsize=struct.lattice.abc)
# 遍历所有氢原子
for h_idx in hydrogens:
h_coords = all_coords[h_idx]
# 查找与H成键的供体 (距离 < bond_threshold)
donor_neighbors = kdtree.query_ball_point(h_coords, r=bond_threshold)
donor_candidates = [idx for idx in donor_neighbors if idx in donors]
# 如果没有找到供体,跳过
if not donor_candidates:
continue
# 选择最近的供体
min_dist = float('inf')
donor_idx = None
for d_idx in donor_candidates:
dist = struct.get_distance(h_idx, d_idx)
if dist < min_dist:
min_dist = dist
donor_idx = d_idx
# 查找在阈值内的受体
acceptor_indices = acceptor_kdtree.query_ball_point(h_coords, r=hbond["threshold"])
for a_idx_offset in acceptor_indices:
a_idx = acceptors[a_idx_offset]
a_coords = all_coords[a_idx]
# 排除供体自身
if a_idx == donor_idx:
continue
# 计算键角
d_coords = all_coords[donor_idx]
angle = calculate_angle(d_coords, h_coords, a_coords)
if angle is not None and angle >= 120.0:
angle_results[hbond["name"]].append(angle)
return angle_results
def calculate_hbond_angles_frame_wrapper(args):
"""包装函数用于多进程处理"""
struct, hbond_config = args
atom_types = identify_atom_types(struct)
return calculate_hbond_angles_frame(struct, atom_types, hbond_config)
def calculate_hbond_angles_parallel(structures, workers=1):
"""并行计算氢键键角"""
hbond_config = get_hbond_angle_config()
all_results = {hbond["name"]: [] for hbond in hbond_config}
# 准备参数列表
args_list = [(struct, hbond_config) for struct in structures]
with multiprocessing.Pool(processes=workers) as pool:
results = list(tqdm(
pool.imap(calculate_hbond_angles_frame_wrapper, args_list),
total=len(structures),
desc="Calculating HBond Angles"
))
# 合并结果
for frame_result in results:
for hbond_name, angles in frame_result.items():
all_results[hbond_name].extend(angles)
return all_results
def plot_hbond_angle_distribution(all_angles, system_name):
"""绘制氢键键角分布图并保存原始数据"""
# 创建输出目录
os.makedirs("HBond_Angle_Plots", exist_ok=True)
os.makedirs("HBond_Angle_Data", exist_ok=True)
hbond_config = get_hbond_angle_config()
# 创建统计信息汇总文件
summary_path = os.path.join("HBond_Angle_Data", f"{system_name}_summary.csv")
with open(summary_path, 'w', newline='') as summary_file:
summary_writer = csv.writer(summary_file)
summary_writer.writerow(["HBond Type", "Count", "Mean (°)", "Std Dev (°)", "Median (°)", "Min (°)", "Max (°)"])
# 处理每种氢键类型
for hbond in hbond_config:
angles = np.array(all_angles[hbond["name"]])
if len(angles) == 0:
print(f"No angles found for {hbond['name']} in {system_name}")
continue
# 创建安全文件名
safe_name = hbond["name"].replace("/", "").replace("=", "").replace("···", "_").replace(" ", "_")
# ========== 保存原始角度数据 ==========
data_path = os.path.join("HBond_Angle_Data", f"{system_name}_{safe_name}.csv")
with open(data_path, 'w', newline='') as data_file:
data_writer = csv.writer(data_file)
data_writer.writerow(["Angle (degrees)"])
for angle in angles:
data_writer.writerow([f"{angle:.4f}"])
# ========== 计算统计量 ==========
mean_angle = np.mean(angles)
std_angle = np.std(angles)
median_angle = np.median(angles)
min_angle = np.min(angles)
max_angle = np.max(angles)
# 保存统计信息到汇总文件
with open(summary_path, 'a', newline='') as summary_file:
summary_writer = csv.writer(summary_file)
summary_writer.writerow([
hbond["name"],
len(angles),
f"{mean_angle:.4f}",
f"{std_angle:.4f}",
f"{median_angle:.4f}",
f"{min_angle:.4f}",
f"{max_angle:.4f}"
])
# ========== 绘制分布图 ==========
plt.figure(figsize=(8, 6))
# 绘制直方图
hist, bins, _ = plt.hist(angles, bins=30, density=True, alpha=0.5,
color=hbond["color"], label="Histogram")
# 绘制KDE曲线
kde = gaussian_kde(angles)
x = np.linspace(120, 180, 500)
plt.plot(x, kde(x), color="black", linewidth=2.5, label="KDE")
# 添加统计信息
stats_text = (f"Mean: {mean_angle:.2f}°\n"
f"Std: {std_angle:.2f}°\n"
f"Median: {median_angle:.2f}°\n"
f"Count: {len(angles)}")
plt.text(0.75, 0.95, stats_text, transform=plt.gca().transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', alpha=0.5))
# 设置标题和标签
plt.title(f"{system_name}: {hbond['name']} Bond Angle Distribution", fontsize=16)
plt.xlabel("Bond Angle (degrees)", fontsize=14)
plt.ylabel("Probability Density", fontsize=14)
plt.xlim(120, 180)
plt.ylim(0, None)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
# 保存图像
image_path = os.path.join("HBond_Angle_Plots", f"{system_name}_{safe_name}_angle.tiff")
plt.savefig(image_path, dpi=600, bbox_inches='tight')
plt.close()
print(f"Saved HBond angle data: {data_path}")
print(f"Saved HBond angle plot: {image_path}")
print(f"Saved summary statistics: {summary_path}")
def main(vasprun_files, workers=1):
"""主处理函数"""
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}: {vasprun_file} with {workers} workers")
print(f"{'='*50}")
start_time = time.time()
try:
# 加载VASP结果
vr = Vasprun(vasprun_file, ionic_step_skip=5)
structures = vr.structures
print(f"Loaded {len(structures)} frames")
# 计算氢键键角分布
print("Calculating hydrogen bond angles...")
hbond_angles = calculate_hbond_angles_parallel(structures, workers=workers)
# 绘制并保存结果
plot_hbond_angle_distribution(hbond_angles, system_name)
elapsed = time.time() - start_time
print(f"\nCompleted processing for {system_name} in {elapsed:.2f} seconds")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}")
print("\nAll HBond angle analysis completed successfully!")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate hydrogen bond angles from VASP simulations')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers (default: {multiprocessing.cpu_count()})')
args = parser.parse_args()
# 自动设置vasprun文件和系统名称
vasprun_files = {
"System1": "vasprun1.xml",
"System2": "vasprun2.xml",
"System3": "vasprun3.xml",
"System4": "vasprun4.xml"
}
# 检查文件是否存在
missing_files = [name for name, path in vasprun_files.items() if not os.path.exists(path)]
if missing_files:
raise FileNotFoundError(f"Missing vasprun files: {', '.join(missing_files)}")
print(f"Starting HBond angle analysis with {args.workers} workers...")
main(vasprun_files, workers=args.workers)以上代码当中修正笛卡尔坐标的直接使用,修改为笛卡尔向量,避免在计算氢键角度的时候部分处于周期性外,导致角度计算出现偏差,在这里需要引入struct,但是pymatgen中应该用structure直接计算原子间距离,而不是通过Lattice避免调用错误,
最新发布