import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.signal import savgol_filter
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib as mpl
import warnings
from collections import defaultdict
import os
import csv
import argparse
import multiprocessing
from functools import partial
import time
import dill
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置 - 符合Journal of Chemical Physics要求
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600, # 提高分辨率
'savefig.dpi': 600,
'figure.figsize': (8, 6), # 期刊常用尺寸
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5, # 加粗坐标轴线
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
# 1. 增强的原子类型识别函数 - 逐帧识别
def identify_atom_types(struct):
"""识别所有关键原子类型并排除自身化学键"""
# 磷酸氧分类
p_oxygens = {"P=O": [], "P-O": [], "P-OH": []}
phosphate_hydrogens = [] # 仅P-OH基团中的H原子
# 水合氢离子识别
hydronium_oxygens = []
hydronium_hydrogens = [] # H₃O⁺中的H原子
# 普通水分子
water_oxygens = []
water_hydrogens = [] # 普通水中的H原子
# 氟离子
fluoride_atoms = [i for i, site in enumerate(struct) if site.species_string == "F"]
# 铝离子
aluminum_atoms = [i for i, site in enumerate(struct) if site.species_string == "Al"]
# 创建快速邻居查找表
neighbor_cache = defaultdict(list)
for i, site in enumerate(struct):
if site.species_string == "O":
neighbors = struct.get_neighbors(site, r=1.3)
h_neighbors = [n[0] for n in neighbors if n[0].species_string == "H"]
neighbor_cache[i] = h_neighbors
# 识别水合氢离子 (H₃O⁺)
if len(h_neighbors) == 3:
hydronium_oxygens.append(i)
for h_site in h_neighbors:
hydronium_hydrogens.append(h_site.index)
# 识别磷酸基团
for site in struct:
if site.species_string == "P":
neighbors = struct.get_neighbors(site, r=2.0) # 扩大搜索半径
# 筛选氧原子邻居
o_neighbors = [(n[0], n[1]) for n in neighbors if n[0].species_string == "O"]
if len(o_neighbors) < 4:
# 如果找不到4个氧原子,使用旧方法
for neighbor in o_neighbors:
nn_site = neighbor[0]
if neighbor[1] < 1.55:
p_oxygens["P=O"].append(nn_site.index)
else:
if any(n[0].species_string == "H" for n in struct.get_neighbors(nn_site, r=1.3)):
p_oxygens["P-OH"].append(nn_site.index)
else:
p_oxygens["P-O"].append(nn_site.index)
continue
# 按距离排序
o_neighbors.sort(key=lambda x: x[1])
# 最近的氧原子为P=O
p_double_o = o_neighbors[0][0]
p_oxygens["P=O"].append(p_double_o.index)
# 其他三个氧原子
for i in range(1, 4):
o_site = o_neighbors[i][0]
# 检查氧原子上是否有氢
if neighbor_cache.get(o_site.index, []):
p_oxygens["P-OH"].append(o_site.index)
else:
p_oxygens["P-O"].append(o_site.index)
# 识别P-OH基团中的H原子 (磷酸中的H)
for o_idx in p_oxygens["P-OH"]:
# 获取与P-OH氧相连的H原子
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
phosphate_hydrogens.append(h_site.index)
# 识别普通水分子 (排除磷酸氧和水合氢离子)
for i, site in enumerate(struct):
if site.species_string == "O" and i not in hydronium_oxygens:
is_phosphate_oxygen = False
for cat in p_oxygens.values():
if i in cat:
is_phosphate_oxygen = True
break
if not is_phosphate_oxygen:
water_oxygens.append(i)
# 识别普通水分子中的H原子 (水中的H)
for o_idx in water_oxygens:
h_neighbors = neighbor_cache.get(o_idx, [])
for h_site in h_neighbors:
if h_site.species_string == "H":
water_hydrogens.append(h_site.index)
return {
"phosphate_oxygens": p_oxygens,
"phosphate_hydrogens": phosphate_hydrogens,
"water_oxygens": water_oxygens,
"water_hydrogens": water_hydrogens,
"hydronium_oxygens": hydronium_oxygens,
"hydronium_hydrogens": hydronium_hydrogens,
"fluoride_atoms": fluoride_atoms,
"aluminum_atoms": aluminum_atoms
}
# 2. RDF计算函数 - 修复负值问题和序列化问题
def process_frame(struct, center_sel, target_sel, r_max, exclude_bonds, bond_threshold):
"""处理单帧结构计算,完全处理空原子类型情况"""
# 每帧重新识别原子类型(关键!)
atom_types = identify_atom_types(struct)
# 获取中心原子和目标原子
centers = center_sel(atom_types)
targets = target_sel(atom_types)
# 处理空原子类型情况 - 第一重保护
if len(centers) == 0 or len(targets) == 0:
return {
"distances": np.array([], dtype=np.float64),
"n_centers": 0,
"n_targets": 0,
"volume": struct.volume
}
center_coords = np.array([struct[i].coords for i in centers])
target_coords = np.array([struct[i].coords for i in targets])
lattice = struct.lattice
kdtree = cKDTree(target_coords, boxsize=lattice.abc)
# 动态确定邻居数量 - 不超过目标原子数
k_val = min(50, len(targets))
# 处理目标原子数量为0的情况 - 第二重保护
if k_val == 0:
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
# 执行查询并确保结果统一格式
try:
query_result = kdtree.query(center_coords, k=k_val, distance_upper_bound=r_max)
except Exception as e:
# 异常处理 - 返回空结果
print(f"KDTree query error: {str(e)}")
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
# 统一处理不同维度的返回结果
if k_val == 1:
# 处理单邻居情况
if isinstance(query_result, tuple):
distances, indices = query_result
else:
distances = query_result
indices = np.zeros_like(distances, dtype=int)
# 确保数组格式
distances = np.atleast_1d(distances)
indices = np.atleast_1d(indices)
else:
# 多邻居情况
distances, indices = query_result
# 确保二维数组格式
if distances.ndim == 1:
distances = distances.reshape(-1, 1)
indices = indices.reshape(-1, 1)
valid_distances = []
for i in range(distances.shape[0]):
center_idx = centers[i]
for j in range(distances.shape[1]):
dist = distances[i, j]
# 跳过超出范围的距离
if dist > r_max or np.isinf(dist):
continue
target_idx = targets[indices[i, j]]
# 排除化学键
if exclude_bonds:
actual_dist = struct.get_distance(center_idx, target_idx)
if actual_dist < bond_threshold:
continue
valid_distances.append(dist)
return {
"distances": np.array(valid_distances, dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
def calculate_rdf_parallel(structures, center_sel, target_sel,
r_max=8.0, bin_width=0.05,
exclude_bonds=True, bond_threshold=1.3,
workers=1):
"""
并行计算径向分布函数
:param workers: 并行工作进程数
"""
bins = np.arange(0, r_max, bin_width)
hist = np.zeros(len(bins) - 1)
total_centers = 0
total_targets = 0
total_volume = 0
# 准备参数 - 使用dill解决序列化问题
dill.settings['recurse'] = True
func = partial(process_frame,
center_sel=center_sel,
target_sel=target_sel,
r_max=r_max,
exclude_bonds=exclude_bonds,
bond_threshold=bond_threshold)
# 使用多进程池
with multiprocessing.Pool(processes=workers) as pool:
results = []
# 使用imap_unordered提高效率
for res in tqdm(pool.imap_unordered(func, structures), total=len(structures), desc="Calculating RDF"):
results.append(res)
# 处理结果 - 特别注意空结果处理
n_frames = 0
for res in results:
if res is None:
continue
n_frames += 1
valid_distances = res["distances"]
n_centers = res["n_centers"]
n_targets = res["n_targets"]
volume = res["volume"]
# 累加计数
if len(valid_distances) > 0:
hist += np.histogram(valid_distances, bins=bins)[0]
total_centers += n_centers
total_targets += n_targets
total_volume += volume
# 修正归一化 - 解决负值问题
if n_frames == 0:
# 没有有效帧时返回空结果
r = bins[:-1] + bin_width/2
return r, np.zeros_like(r), {"position": None, "value": None}
avg_density = total_targets / total_volume if total_volume > 0 else 0
r = bins[:-1] + bin_width/2
rdf = np.zeros_like(r)
for i in range(len(hist)):
r_lower = bins[i]
r_upper = bins[i+1]
shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3)
expected_count = shell_vol * avg_density * total_centers
# 避免除以零
if expected_count > 1e-10:
rdf[i] = hist[i] / expected_count
else:
rdf[i] = 0
# 更稳健的平滑处理 - 避免边界效应
if len(rdf) > 10:
window_length = min(15, len(rdf)//2*2+1)
polyorder = min(5, window_length-1)
rdf_smoothed = savgol_filter(rdf, window_length=window_length, polyorder=polyorder, mode='mirror')
else:
rdf_smoothed = rdf
# 计算主要峰值
peak_info = {}
mask = (r >= 1.5) & (r <= 3.0)
if np.any(mask) and np.any(rdf_smoothed[mask] > 0):
peak_idx = np.argmax(rdf_smoothed[mask])
peak_pos = r[mask][peak_idx]
peak_val = rdf_smoothed[mask][peak_idx]
peak_info = {"position": peak_pos, "value": peak_val}
else:
peak_info = {"position": None, "value": None}
return r, rdf_smoothed, peak_info
# 3. 定义精细化的选择器函数(避免lambda序列化问题)
def selector_phosphate_P_double_O(atom_types):
return atom_types["phosphate_oxygens"]["P=O"]
def selector_phosphate_P_OH(atom_types):
return atom_types["phosphate_oxygens"]["P-OH"]
def selector_phosphate_P_O(atom_types):
return atom_types["phosphate_oxygens"]["P-O"]
def selector_phosphate_hydrogens(atom_types):
return atom_types["phosphate_hydrogens"]
def selector_water_only_hydrogens(atom_types):
"""仅选择水分子中的氢原子"""
return atom_types["water_hydrogens"]
def selector_hydronium_only_hydrogens(atom_types):
"""仅选择水合氢离子中的氢原子"""
return atom_types["hydronium_hydrogens"]
def selector_water_only_oxygens(atom_types):
"""仅选择水分子中的氧原子"""
return atom_types["water_oxygens"]
def selector_hydronium_only_oxygens(atom_types):
"""仅选择水合氢离子中的氧原子"""
return atom_types["hydronium_oxygens"]
def selector_fluoride_atoms(atom_types):
return atom_types["fluoride_atoms"]
def selector_aluminum_atoms(atom_types):
return atom_types["aluminum_atoms"]
def selector_all_phosphate_oxygens(atom_types):
return (atom_types["phosphate_oxygens"]["P=O"] +
atom_types["phosphate_oxygens"]["P-O"] +
atom_types["phosphate_oxygens"]["P-OH"])
# 4. 根据您的要求定义六张图的RDF分组配置
def get_rdf_groups():
"""返回六张图的RDF分组配置(完全符合您的需求)"""
return {
# 图1: Al的配位情况
"Al_Coordination": [
(selector_aluminum_atoms, selector_fluoride_atoms, "Al-F", "blue"),
(selector_aluminum_atoms, selector_water_only_oxygens, "Al-Ow", "green"),
(selector_aluminum_atoms, selector_all_phosphate_oxygens, "Al-Op", "red")
],
# 图2: F与H形成的氢键
"F_Hydrogen_Bonding": [
(selector_fluoride_atoms, selector_water_only_hydrogens, "F-Hw", "lightblue"),
(selector_fluoride_atoms, selector_hydronium_only_hydrogens, "F-Hh", "blue"),
(selector_fluoride_atoms, selector_phosphate_hydrogens, "F-Hp", "darkblue")
],
# 图3: 磷酸作为受体与周围环境的氢键(区分氧类型)
"Phosphate_Acceptor": [
(selector_phosphate_P_double_O, selector_water_only_hydrogens, "P=O···Hw", "orange"),
(selector_phosphate_P_double_O, selector_hydronium_only_hydrogens, "P=O···Hh", "red"),
(selector_phosphate_P_O, selector_water_only_hydrogens, "P-O···Hw", "lightgreen"),
(selector_phosphate_P_O, selector_hydronium_only_hydrogens, "P-O···Hh", "green"),
(selector_phosphate_P_OH, selector_water_only_hydrogens, "P-OH···Hw", "lightblue"),
(selector_phosphate_P_OH, selector_hydronium_only_hydrogens, "P-OH···Hh", "blue")
],
# 图4: 磷酸-水-水合氢离子交叉氢键(排除同种类型)
"Cross_Species_HBonding": [
(selector_phosphate_hydrogens, selector_water_only_oxygens, "Hp···Ow", "pink"),
(selector_phosphate_hydrogens, selector_hydronium_only_oxygens, "Hp···Oh", "purple"),
(selector_water_only_hydrogens, selector_all_phosphate_oxygens, "Hw···Op", "lightgreen"),
(selector_water_only_hydrogens, selector_hydronium_only_oxygens, "Hw···Oh", "green"),
(selector_hydronium_only_hydrogens, selector_water_only_oxygens, "Hh···Ow", "lightblue"),
(selector_hydronium_only_hydrogens, selector_all_phosphate_oxygens, "Hh···Op", "blue")
],
# 图5: 同类型分子内/间氢键(区分磷酸氧类型)
"Same_Species_HBonding": [
(selector_phosphate_hydrogens, selector_phosphate_P_double_O, "Hp···P=O", "red"),
(selector_phosphate_hydrogens, selector_phosphate_P_O, "Hp···P-O", "orange"),
(selector_phosphate_hydrogens, selector_phosphate_P_OH, "Hp···P-OH", "yellow"),
(selector_water_only_hydrogens, selector_water_only_oxygens, "Hw···Ow", "lightblue"),
(selector_hydronium_only_hydrogens, selector_hydronium_only_oxygens, "Hh···Oh", "blue")
],
# 图6: O-O聚集分析(Op不区分类型)
"O_O_Aggregation": [
(selector_all_phosphate_oxygens, selector_water_only_oxygens, "Op-Ow", "blue"),
(selector_all_phosphate_oxygens, selector_hydronium_only_oxygens, "Op-Oh", "green"),
(selector_all_phosphate_oxygens, selector_all_phosphate_oxygens, "Op-Op", "red"),
(selector_water_only_oxygens, selector_hydronium_only_oxygens, "Ow-Oh", "purple"),
(selector_water_only_oxygens, selector_water_only_oxygens, "Ow-Ow", "cyan"),
(selector_hydronium_only_oxygens, selector_hydronium_only_oxygens, "Oh-Oh", "magenta")
]
}
# 5. 主程序 - 优化并行处理
def main(workers=1):
# 定义要处理的体系
vasprun_files = {
"System1": "vasprun1.xml",
"System2": "vasprun2.xml",
"System3": "vasprun3.xml",
"System4": "vasprun4.xml"
}
# 获取RDF分组配置
rdf_groups = get_rdf_groups()
# 标题映射(根据您的要求)
title_map = {
"Al_Coordination": "Al Coordination Environment",
"F_Hydrogen_Bonding": "F-H Hydrogen Bonding",
"Phosphate_Acceptor": "Phosphate as H-bond Acceptor",
"Cross_Species_HBonding": "Cross H-bonding between Different Species",
"Same_Species_HBonding": "Intra- and Inter-molecular H-bonding",
"O_O_Aggregation": "O-O Aggregation Analysis"
}
# 存储所有数据
all_system_data = {}
group_y_max = {group_name: 0 for group_name in list(rdf_groups.keys())}
group_x_max = {
"Al_Coordination": (1.5, 3.5),
"F_Hydrogen_Bonding": (1.0, 3.0),
"Phosphate_Acceptor": (1.0, 3.0),
"Cross_Species_HBonding": (1.0, 3.0),
"Same_Species_HBonding": (1.0, 3.0),
"O_O_Aggregation": (2.0, 6.0)
}
# 创建输出目录
os.makedirs("RDF_Plots", exist_ok=True)
# 计算所有体系的所有RDF数据
for system_name, vasprun_file in vasprun_files.items():
print(f"\n{'='*50}")
print(f"Processing {system_name}: {vasprun_file} with {workers} workers")
print(f"{'='*50}")
start_time = time.time()
try:
# 加载VASP结果
vr = Vasprun(vasprun_file, ionic_step_skip=5)
structures = vr.structures
print(f"Loaded {len(structures)} frames")
# 存储体系数据
system_data = {
"rdf_results": {},
"peak_infos": {}
}
# 计算所有RDF分组
for group_name, pairs in rdf_groups.items():
system_data["rdf_results"][group_name] = {}
system_data["peak_infos"][group_name] = {}
group_y_max_current = 0
for center_sel, target_sel, label, color in pairs:
print(f"\nCalculating RDF for: {label}")
try:
r, rdf, peak_info = calculate_rdf_parallel(
structures,
center_sel,
target_sel,
r_max=10.0,
exclude_bonds=True,
bond_threshold=1.3,
workers=workers
)
system_data["rdf_results"][group_name][label] = (r, rdf, color)
system_data["peak_infos"][group_name][label] = peak_info
if len(rdf) > 0:
current_max = np.max(rdf)
if current_max > group_y_max_current:
group_y_max_current = current_max
if peak_info["position"] is not None:
print(f" Peak for {label}: {peak_info['position']:.3f} Å (g(r) = {peak_info['value']:.2f})")
else:
print(f" No significant peak found for {label} in 1.5-3.0 Å range")
except Exception as e:
print(f"Error calculating RDF for {label}: {str(e)}")
system_data["rdf_results"][group_name][label] = (np.array([]), np.array([]), color)
system_data["peak_infos"][group_name][label] = {"position": None, "value": None}
if group_y_max_current > group_y_max[group_name]:
group_y_max[group_name] = group_y_max_current
all_system_data[system_name] = system_data
elapsed = time.time() - start_time
print(f"\nCompleted processing for {system_name} in {elapsed:.2f} seconds")
except Exception as e:
print(f"Error processing {system_name}: {str(e)}")
# 为每个分组添加余量
for group_name in group_y_max:
group_y_max[group_name] = max(group_y_max[group_name] * 1.15, 3.0) # 确保最小值
# 第二步:生成符合期刊要求的图表
for system_name, system_data in all_system_data.items():
print(f"\nGenerating publication-quality plots for {system_name}")
for group_name, group_data in system_data["rdf_results"].items():
fig, ax = plt.subplots(figsize=(8, 6))
# 设置坐标轴范围
xlim = group_x_max.get(group_name, (0, 6.0))
ylim = (0, group_y_max[group_name])
for label, (r, rdf, color) in group_data.items():
if len(r) > 0 and len(rdf) > 0:
ax.plot(r, rdf, color=color, label=label, linewidth=2.0)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
# 期刊格式标签
ax.set_xlabel('Radial Distance (Å)', fontweight='bold')
ax.set_ylabel('g(r)', fontweight='bold')
# 添加体系名称到标题
ax.set_title(f"{system_name}: {title_map[group_name]}", fontsize=16, pad=15)
# 精简图例
ncol = 3 if group_name == "Same_Species_HBonding" else 1 # 图5使用三列图例
ax.legend(ncol=ncol, loc='best', framealpha=0.8, fontsize=10)
# 添加氢键区域标记(除O-O聚集图外)
if group_name != "O_O_Aggregation":
ax.axvspan(1.5, 2.5, alpha=0.1, color='green', zorder=0)
ax.text(1.7, ylim[1]*0.85, 'H-bond Region', fontsize=10)
# 添加网格
ax.grid(True, linestyle='--', alpha=0.5)
# 保存高分辨率图片
plt.tight_layout()
filename = os.path.join("RDF_Plots", f"RDF_{system_name}_{group_name}.tiff")
plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff')
print(f"Saved publication plot: {filename}")
plt.close()
# 保存Origin兼容数据
save_origin_data(system_name, system_data)
print("\nAll RDF analysis completed successfully!")
def save_origin_data(system_name, system_data):
"""保存Origin兼容格式数据"""
os.makedirs("Origin_Data", exist_ok=True)
system_dir = os.path.join("Origin_Data", system_name)
os.makedirs(system_dir, exist_ok=True)
# 保存峰值信息
peak_info_path = os.path.join(system_dir, f"Peak_Positions_{system_name}.csv")
with open(peak_info_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Group", "Interaction", "Peak Position (A)", "g(r) Value"])
for group_name, peaks in system_data["peak_infos"].items():
for label, info in peaks.items():
if info["position"] is not None:
writer.writerow([group_name, label, f"{info['position']:.3f}", f"{info['value']:.3f}"])
else:
writer.writerow([group_name, label, "N/A", "N/A"])
print(f"Saved peak positions: {peak_info_path}")
# 保存RDF数据
for group_name, group_results in system_data["rdf_results"].items():
group_dir = os.path.join(system_dir, group_name)
os.makedirs(group_dir, exist_ok=True)
for label, (r, rdf, color) in group_results.items():
if len(r) > 0 and len(rdf) > 0:
safe_label = label.replace(" ", "_").replace("/", "_").replace("=", "_")
safe_label = safe_label.replace("(", "").replace(")", "").replace("$", "")
filename = f"RDF_{system_name}_{group_name}_{safe_label}.csv"
filepath = os.path.join(group_dir, filename)
with open(filepath, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Distance (A)", "g(r)"])
for i in range(len(r)):
writer.writerow([f"{r[i]:.6f}", f"{rdf[i]:.6f}"])
print(f"Saved Origin data: {filename}")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate RDF for VASP simulations')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers (default: {multiprocessing.cpu_count()})')
args = parser.parse_args()
print(f"Starting RDF analysis with {args.workers} workers...")
main(workers=args.workers)
以上代码实现了湿法磷酸中水和水合氢离子以及磷酸之间的RDF计算,它是一个包含质子转移过程的部分。我们在这里将沿用这个框架来实现相同指令可执行的文件。我将对内容和识别逻辑进行修改,我们在这里只识别P,在识别P之后搜寻周围的O,如果P-O之间的距离小于1.8埃,则将该O视为Op。然后我们将Op作为中心原子,体系中的所有H(是整个体系中的所有H)作为目标原子计算RDF,输出文本和图。