import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.signal import savgol_filter
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
from collections import defaultdict
import os
import csv
import argparse
import multiprocessing
from functools import partial
import time
import types
import dill
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置 - 符合Journal of Chemical Physics要求
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600, # 提高分辨率
'savefig.dpi': 600,
'figure.figsize': (8, 6), # 期刊常用尺寸
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5, # 加粗坐标轴线
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
# 1. 增强的原子类型识别函数 - 逐帧识别
def identify_atom_types(struct):
"""识别所有关键原子类型并排除自身化学键"""
# 磷酸氧分类
p_oxygens = {"P=O": [], "P-O": [], "P-OH": []}
phosphate_hydrogens = [] # 仅P-OH基团中的H原子
# 水合氢离子识别
hydronium_oxygens = []
hydronium_hydrogens = [] # H₃O⁺中的H原子
# 普通水分子
water_oxygens = []
water_hydrogens = [] # 普通水中的H原子
# 氟离子
fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"]
# 铝离子
aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"]
# 创建快速邻居查找表
neighbor_cache = defaultdict(list)
for i, site in enumerate(struct):
if site.species_string == "O":
neighbors = struct.get_neighbors(site, r=1.3)
h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"]
neighbor_cache[i] = h_neighbors
# 识别水合氢离子 (H₃O⁺)
if len(h_neighbors) == 3:
hydronium_oxygens.append(i)
for h_site in h_neighbors:
hydronium_hydrogens.append(h_site.index)
# 识别磷酸基团
for site in struct:
if site.species_string == "P":
neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径
# 筛选氧原子邻居
o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"]
if len(o_neighbors) < 4:
# 如果找不到4个氧原子,使用旧方法
for neighbor in o_neighbors:
nn_site = neighbor[0]
if neighbor[1] < 1.55:
p_oxygens["P=O"].append(nn_site.index)
else:
if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)):
p_oxygens["P-OH"].append(nn_site.index)
else:
p_oxygens["P-O"].append(nn_site.index)
continue
# 按距离排序
o_neighbors.sort(key=lambda x: x[1])
# 最近的氧原子为P=O
p_double_o = o_neighbors[0][0]
p_oxygens["P=O"].append(p_double_o.index)
# 其他三个氧原子
for i in range(1, 4):
o_site = o_neighbors[i][0]
# 检查氧原子上是否有氢
if neighbor_cache.get(o_site.index, []):
p_oxygens["P-OH"].append(o_site.index)
else:
p_oxygens["P-O"].append(o_site.index)
# 识别P-OH基团中的H原子 (磷酸中的H)
for o_idx in p_oxygens["P-OH"]:
# 获取与P-OH氧相连的H原子
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
phosphate_hydrogens.append(h_site.index)
# 识别普通水分子 (排除磷酸氧和水合氢离子)
for i, site in enumerate(struct):
if site.species_string == "O" and i not in hydronium_oxygens:
is_phosphate_oxygen = False
for cat in p_oxygens.values():
if i in cat:
is_phosphate_oxygen = True
break
if not is_phosphate_oxygen:
water_oxygens.append(i)
# 识别普通水分子中的H原子 (水中的H)
for o_idx in water_oxygens:
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
water_hydrogens.append(h_site.index)
return {
"phosphate_oxygens": p_oxygens,
"phosphate_hydrogens": phosphate_hydrogens,
"water_oxygens": water_oxygens,
"water_hydrogens": water_hydrogens,
"hydronium_oxygens": hydronium_oxygens,
"hydronium_hydrogens": hydronium_hydrogens,
"fluoride_atoms": fluoride_atoms,
"aluminum_atoms": aluminum_atoms
}
# 2. RDF计算函数 - 修复负值问题和序列化问题
def process_frame(struct, center_sel, target_sel, r_max, exclude_bonds, bond_threshold):
"""处理单帧结构计算,完全处理空原子类型情况"""
# 每帧重新识别原子类型(关键!)
atom_types = identify_atom_types(struct)
# 获取中心原子和目标原子
centers = center_sel(atom_types)
targets = target_sel(atom_types)
# 处理空原子类型情况 - 第一重保护
if len(centers) == 0 or len(targets) == 0:
return {
"distances": np.array([], dtype=np.float64),
"n_centers": 0,
"n_targets": 0,
"volume": struct.volume
}
center_coords = np.array([struct[i].coords for i in centers])
target_coords = np.array([struct[i].coords for i in targets])
lattice = struct.lattice
kdtree = cKDTree(target_coords, boxsize=lattice.abc)
# 动态确定邻居数量 - 不超过目标原子数
k_val = min(50, len(targets))
# 处理目标原子数量为0的情况 - 第二重保护
if k_val == 0:
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
# 执行查询并确保结果统一格式
try:
query_result = kdtree.query(center_coords, k=k_val, distance_upper_bound=r_max)
except Exception as e:
# 异常处理 - 返回空结果
print(f"KDTree query error: {str(e)}")
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
# 统一处理不同维度的返回结果
if k_val == 1:
# 处理单邻居情况
if isinstance(query_result, tuple):
distances, indices = query_result
else:
distances = query_result
indices = np.zeros_like(distances, dtype=int)
# 确保数组格式
distances = np.atleast_1d(distances)
indices = np.atleast_1d(indices)
else:
# 多邻居情况
distances, indices = query_result
# 确保二维数组格式
if distances.ndim == 1:
distances = distances.reshape(-1, 1)
indices = indices.reshape(-1, 1)
valid_distances = []
for i in range(distances.shape[0]):
center_idx = centers[i]
for j in range(distances.shape[1]):
dist = distances[i, j]
# 跳过超出范围的距离
if dist > r_max or np.isinf(dist):
continue
target_idx = targets[indices[i, j]]
# 排除化学键
if exclude_bonds:
actual_dist = struct.get_distance(center_idx, target_idx)
if actual_dist < bond_threshold:
continue
valid_distances.append(dist)
return {
"distances": np.array(valid_distances, dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
def calculate_rdf_parallel(structures, center_sel, target_sel,
r_max=8.0, bin_width=0.05,
exclude_bonds=True, bond_threshold=1.3,
workers=1):
"""
并行计算径向分布函数
:param workers: 并行工作进程数
"""
bins = np.arange(0, r_max, bin_width)
hist = np.zeros(len(bins) - 1)
total_centers = 0
total_targets = 0
total_volume = 0
# 准备参数 - 使用dill解决lambda序列化问题
dill.settings['recurse'] = True
func = partial(process_frame,
center_sel=center_sel,
target_sel=target_sel,
r_max=r_max,
exclude_bonds=exclude_bonds,
bond_threshold=bond_threshold)
# 使用多进程池
with multiprocessing.Pool(processes=workers) as pool:
results = []
# 使用imap_unordered提高效率
for res in tqdm(pool.imap_unordered(func, structures), total=len(structures), desc="Calculating RDF"):
results.append(res)
# 处理结果 - 特别注意空结果处理
n_frames = 0
for res in results:
if res is None:
continue
n_frames += 1
valid_distances = res["distances"]
n_centers = res["n_centers"]
n_targets = res["n_targets"]
volume = res["volume"]
# 累加计数
if len(valid_distances) > 0:
hist += np.histogram(valid_distances, bins=bins)[0]
total_centers += n_centers
total_targets += n_targets
total_volume += volume
# 修正归一化 - 解决负值问题
if n_frames == 0:
# 没有有效帧时返回空结果
r = bins[:-1] + bin_width/2
return r, np.zeros_like(r), {"position": None, "value": None}
avg_density = total_targets / total_volume if total_volume > 0 else 0
r = bins[:-1] + bin_width/2
rdf = np.zeros_like(r)
for i in range(len(hist)):
r_lower = bins[i]
r_upper = bins[i+1]
shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3)
expected_count = shell_vol * avg_density * total_centers
# 避免除以零
if expected_count > 1e-10:
rdf[i] = hist[i] / expected_count
else:
rdf[i] = 0
# 更稳健的平滑处理 - 避免边界效应
if len(rdf) > 10:
window_length = min(15, len(rdf)//2*2+1)
polyorder = min(5, window_length-1)
rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=polyorder, mode='mirror')
else:
rdf_smoothed = rdf
# 计算主要峰值
peak_info = {}
mask = (r >= 1.5) & (r <= 3.0)
if np.any(mask) and np.any(rdf_smoothed[mask] > 0):
peak_idx = np.argmax(rdf_smoothed[mask])
peak_pos = r[mask][peak_idx]
peak_val = rdf_smoothed[mask][peak_idx]
peak_info = {"position": peak_pos, "value": peak_val}
else:
peak_info = {"position": None, "value": None}
return r, rdf_smoothed, peak_info
# 3. 定义选择器函数(避免lambda序列化问题)
def selector_phosphate_P_double_O(atom_types):
return atom_types["phosphate_oxygens"]["P=O"]
def selector_phosphate_P_OH(atom_types):
return atom_types["phosphate_oxygens"]["P-OH"]
def selector_phosphate_P_O(atom_types):
return atom_types["phosphate_oxygens"]["P-O"]
def selector_phosphate_hydrogens(atom_types):
return atom_types["phosphate_hydrogens"]
def selector_water_hydrogens(atom_types):
return atom_types["water_hydrogens"]
def selector_hydronium_hydrogens(atom_types):
return atom_types["hydronium_hydrogens"]
def selector_water_oxygens(atom_types):
return atom_types["water_oxygens"]
def selector_hydronium_oxygens(atom_types):
return atom_types["hydronium_oxygens"]
def selector_fluoride_atoms(atom_types):
return atom_types["fluoride_atoms"]
def selector_aluminum_atoms(atom_types):
return atom_types["aluminum_atoms"]
def selector_all_phosphate_oxygens(atom_types):
return (atom_types["phosphate_oxygens"]["P=O"] +
atom_types["phosphate_oxygens"]["P-O"] +
atom_types["phosphate_oxygens"]["P-OH"])
# 4. RDF分组定义
def get_rdf_groups():
"""返回RDF分组配置(使用预定义函数避免序列化问题)"""
return {
"Phosphate_H_Bonds": [
# 磷酸作为受体
(selector_phosphate_P_double_O,
lambda s: selector_water_hydrogens(s) + selector_hydronium_hydrogens(s),
"P=O···H", "#1f77b4"),
(selector_phosphate_P_OH,
lambda s: selector_water_hydrogens(s) + selector_hydronium_hydrogens(s),
"P-OH···H", "#ff7f0e"),
(selector_phosphate_P_O,
lambda s: selector_water_hydrogens(s) + selector_hydronium_hydrogens(s),
"P-O···H", "#17becf"),
# 磷酸作为供体
(selector_phosphate_hydrogens,
lambda s: selector_water_oxygens(s) + selector_hydronium_oxygens(s),
"P-OH···O", "#d62728"),
],
"Hydronium_H_Bonds": [
# 水合氢离子作为受体
(selector_hydronium_oxygens,
lambda s: selector_water_hydrogens(s) + selector_phosphate_hydrogens(s),
r"H$ _3$ O$^+$ O···H", "#9467bd"),
# 水合氢离子作为供体
(selector_hydronium_hydrogens,
selector_water_oxygens,
r"H$ _3$ O$^+$ H···O$ _w$", "#8c564b"),
(selector_hydronium_hydrogens,
selector_all_phosphate_oxygens,
r"H$ _3$ O$^+$ H···O$ _p$", "#e377c2"),
],
"Water_Network": [
# 水分子之间的氢键
(selector_water_oxygens,
selector_water_hydrogens,
r"O$ _w$···H$ _w$", "#2ca02c"),
# 水作为受体与水合氢离子供体
(selector_water_oxygens,
selector_hydronium_hydrogens,
r"O$ _w$···H$ _h$", "#d62728"),
],
"Fluoride_H_Bonds": [
# 氟离子作为受体
(selector_fluoride_atoms,
selector_water_hydrogens,
r"F···H$ _w$", "#2ca02c"),
(selector_fluoride_atoms,
selector_phosphate_hydrogens,
r"F···H$ _p$", "#d62728"),
(selector_fluoride_atoms,
selector_hydronium_hydrogens,
r"F···H$ _h$", "#9467bd"),
],
"Aluminum_Coordination": [
# 铝与水中的氧
(selector_aluminum_atoms,
selector_water_oxygens,
r"Al···O$ _w$", "#1f77b4"),
# 铝与磷酸中的氧
(selector_aluminum_atoms,
selector_all_phosphate_oxygens,
r"Al···O$ _p$", "#ff7f0e"),
# 铝与氟的配位
(selector_aluminum_atoms,
selector_fluoride_atoms,
r"Al···F", "#17becf"),
],
"Phosphate_Phosphate_H_Bonds": [
# 磷酸基团内部的氢键作用
(selector_phosphate_hydrogens,
selector_phosphate_P_double_O,
r"P-OH···P=O", "#1f77b4"),
(selector_phosphate_hydrogens,
selector_phosphate_P_O,
r"P-OH···P-O", "#ff7f0e"),
(selector_phosphate_hydrogens,
selector_phosphate_P_OH,
r"P-OH···P-OH", "#d62728"),
],
"Phosphate_Phosphate_Interactions": [
# 1. 所有磷酸氧之间的整体聚集
(selector_all_phosphate_oxygens,
selector_all_phosphate_oxygens,
"All P-Oxygens", "#1f77b4"),
# 2. 不同类型磷酸氧之间的特定相互作用
(selector_phosphate_P_double_O,
selector_phosphate_P_double_O,
"P=O···P=O", "#ff7f0e"),
(selector_phosphate_P_double_O,
selector_phosphate_P_O,
"P=O···P-O", "#2ca02c"),
(selector_phosphate_P_double_O,
selector_phosphate_P_OH,
"P=O···P-OH", "#d62728"),
(selector_phosphate_P_O,
selector_phosphate_P_OH,
"P-O···P-OH", "#9467bd"),
(selector_phosphate_P_OH,
selector_phosphate_P_OH,
"P-OH···P-OH", "#8c564b"),
# 3. 氢键供体-受体关系 (P-OH中的H与其他磷酸氧)
(selector_phosphate_hydrogens,
selector_phosphate_P_double_O,
"P-OH···P=O (H-bond)", "#e377c2"),
(selector_phosphate_hydrogens,
selector_phosphate_P_O,
"P-OH···P-O (H-bond)", "#7f7f7f"),
(selector_phosphate_hydrogens,
selector_phosphate_P_OH,
"P-OH···P-OH (H-bond)", "#bcbd22")
]
}
# 5. 主程序 - 优化并行处理
def main(workers=1):
# 定义要处理的体系
vasprun_files = {
"System1": "vasprun1.xml",
"System2": "vasprun2.xml",
"System3": "vasprun3.xml",
"System4": "vasprun4.xml"
}
# 获取RDF分组配置
rdf_groups = get_rdf_groups()
# 存储所有数据
all_system_data = {}
group_y_max = {group_name: 0 for group_name in list(rdf_groups.keys())}
global_x_max = 6.0
# 创建输出目录
os.makedirs("RDF_Plots", exist_ok=True)
# 计算所有体系的所有RDF数据
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}: {vasprun_file} with {workers} workers")
print(f"{'='*50}")
start_time = time.time()
try:
# 加载VASP结果
vr = Vasprun(vasprun_file, ionic_step_skip=5)
structures = vr.structures
print(f"Loaded {len(structures)} frames")
# 存储体系数据
system_data = {
"rdf_results": {},
"peak_infos": {}
}
# 计算所有RDF分组
for group_name, pairs in rdf_groups.items():
system_data["rdf_results"][group_name] = {}
system_data["peak_infos"][group_name] = {}
group_y_max_current = 0
for center_sel, target_sel, label, color in pairs:
print(f"\nCalculating RDF for: {label}")
try:
r, rdf, peak_info = calculate_rdf_parallel(
structures,
center_sel,
target_sel,
r_max=global_x_max,
exclude_bonds=True,
bond_threshold=1.3,
workers=workers
)
system_data["rdf_results"][group_name][label] = (r, rdf, color)
system_data["peak_infos"][group_name][label] = peak_info
if len(rdf) > 0:
current_max = np.max(rdf)
if current_max > group_y_max_current:
group_y_max_current = current_max
if peak_info["position"] is not None:
print(f" Peak for {label}: {peak_info['position']:.3f} Å (g(r) = {peak_info['value']:.2f})")
else:
print(f" No significant peak found for {label} in 1.5-3.0 Å range")
except Exception as e:
print(f"Error calculating RDF for {label}: {str(e)}")
system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color)
system_data["peak_infos"][group_name][label] = {"position": None, "value": None}
if group_y_max_current > group_y_max[group_name]:
group_y_max[group_name] = group_y_max_current
all_system_data[system_name] = system_data
elapsed = time.time() - start_time
print(f"\nCompleted processing for {system_name} in {elapsed:.2f} seconds")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}")
# 为每个分组添加余量
for group_name in group_y_max:
group_y_max[group_name] = max(group_y_max[group_name] * 1.15, 3.0) # 确保最小值
# 第二步:生成符合期刊要求的图表
for system_name, system_data in all_system_data.items():
print(f"\nGenerating publication-quality plots for {system_name}")
for group_name, group_data in system_data["rdf_results"].items():
fig, ax = plt.subplots(figsize=(8, 6))
for label, (r, rdf, color) in group_data.items():
if len(r) > 0 and len(rdf) > 0:
ax.plot(r, rdf, color=color, label=label, linewidth=2.0)
ax.set_xlim(0, global_x_max)
ax.set_ylim(0, group_y_max[group_name])
# 期刊格式标签
ax.set_xlabel('Radial Distance (Å)', fontweight='bold')
ax.set_ylabel('g(r)', fontweight='bold')
# 添加体系名称到标题
title_map = {
"Phosphate_H_Bonds": "Phosphate Hydrogen Bonding",
"Hydronium_H_Bonds": "Hydronium Ion Hydrogen Bonding",
"Water_Network": "Water Network Hydrogen Bonding",
"Fluoride_H_Bonds": "Fluoride Ion Hydrogen Bonding",
"Aluminum_Coordination": "Aluminum Coordination Environment",
"Phosphate_Phosphate_H_Bonds": "Phosphate-Phosphate Hydrogen Bonding",
"Phosphate_Phosphate_Interactions": "Phosphate-Phosphate Interactions"
}
ax.set_title(f"{system_name}: {title_map[group_name]}", fontsize=16, pad=15)
# 精简图例
ax.legend(ncol=1, loc='best', framealpha=0.8, fontsize=10)
# 添加氢键区域标记
ax.axvspan(1.5, 2.5, alpha=0.1, color='green', zorder=0)
# 添加网格
ax.grid(True, linestyle='--', alpha=0.5)
# 保存高分辨率图片
plt.tight_layout()
filename = os.path.join("RDF_Plots", f"RDF_{system_name}_{group_name}.tiff")
plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff')
print(f"Saved publication plot: {filename}")
plt.close()
# 保存Origin兼容数据
save_origin_data(system_name, system_data)
print("\nAll RDF analysis completed successfully!")
def save_origin_data(system_name, system_data):
"""保存Origin兼容格式数据"""
os.makedirs("Origin_Data", exist_ok=True)
system_dir = os.path.join("Origin_Data", system_name)
os.makedirs(system_dir, exist_ok=True)
# 保存峰值信息
peak_info_path = os.path.join(system_dir, f"Peak_Positions_{system_name}.csv")
with open(peak_info_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Group", "Interaction", "Peak Position (A)", "g(r) Value"])
for group_name, peaks in system_data["peak_infos"].items():
for label, info in peaks.items():
if info["position"] is not None:
writer.writerow([group_name, label, f"{info['position']:.3f}", f"{info['value']:.3f}"])
else:
writer.writerow([group_name, label, "N/A", "N/A"])
print(f"Saved peak positions: {peak_info_path}")
# 保存RDF数据
for group_name, group_results in system_data["rdf_results"].items():
group_dir = os.path.join(system_dir, group_name)
os.makedirs(group_dir, exist_ok=True)
for label, (r, rdf, color) in group_results.items():
if len(r) > 0 and len(rdf) > 0:
safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_")
safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "")
filename = f"RDF_{system_name}_{group_name}_{safe_label}.csv"
filepath = os.path.join(group_dir, filename)
with open(filepath, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Distance (A)", "g(r)"])
for i in range(len(r)):
writer.writerow([f"{r[i]:.6f}", f"{rdf[i]:.6f}"])
print(f"Saved Origin data: {filename}")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate RDF for VASP simulations')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers (default: {multiprocessing.cpu_count()})')
args = parser.parse_args()
print(f"Starting RDF analysis with {args.workers} workers...")
main(workers=args.workers)
也就是说你把上述的代码当中将H和O作为组合选择器的部分去除,加入现在我们细致区分的部分,输出完整代码
最新发布