import sys
import io
import numpy as np
import matplotlib.pyplot as plt
from pymatgen.io.vasp import Vasprun
from pymatgen.core.structure import Structure
from scipy.spatial import cKDTree
import matplotlib as mpl
import warnings
import os
import argparse
import multiprocessing
from functools import partial
import time
import logging
from collections import defaultdict
# 设置编码为 UTF-8 以支持特殊字符
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 配置日志记录
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("rdf_analysis.log", encoding='utf-8'),
logging.StreamHandler()
]
)
# 忽略可能的警告
warnings.filterwarnings("ignore", category=UserWarning)
# 专业绘图设置
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'font.family': 'serif',
'font.serif': ['Times New Roman', 'DejaVu Serif'],
'font.size': 12,
'axes.labelsize': 14,
'axes.titlesize': 16,
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'figure.dpi': 600,
'savefig.dpi': 600,
'figure.figsize': (8, 6),
'lines.linewidth': 2.0,
'legend.fontsize': 10,
'legend.framealpha': 0.8,
'mathtext.default': 'regular',
'axes.linewidth': 1.5,
'xtick.major.width': 1.5,
'ytick.major.width': 1.5,
'xtick.major.size': 5,
'ytick.major.size': 5,
})
def identify_atom_types(struct):
"""
识别磷酸氧(Op)、非磷酸氧(O_non-Op)和所有氢原子(H)
:param struct: 结构对象
:return: 包含Op, O_non-Op和H索引的字典
"""
phosphate_oxygens = []
all_oxygens = []
all_hydrogens = []
# 识别所有氧原子和氢原子
for i, site in enumerate(struct):
if site.species_string == "O":
all_oxygens.append(i)
elif site.species_string == "H":
all_hydrogens.append(i)
# 识别磷酸氧:与P原子距离小于1.6A的氧原子
for i, site in enumerate(struct):
if site.species_string == "P":
neighbors = struct.get_neighbors(site, r=1.6)
for neighbor in neighbors:
if neighbor[0].species_string == "O":
phosphate_oxygens.append(neighbor[0].index)
# 去重
phosphate_oxygens = list(set(phosphate_oxygens))
# 非磷酸氧 = 所有氧 - 磷酸氧
non_phosphate_oxygens = [idx for idx in all_oxygens if idx not in phosphate_oxygens]
logging.info(f"磷酸氧(Op): {len(phosphate_oxygens)}个, "
f"非磷酸氧(O_non-Op): {len(non_phosphate_oxygens)}个, "
f"氢原子(H): {len(all_hydrogens)}个")
return {
"phosphate_oxygens": phosphate_oxygens,
"non_phosphate_oxygens": non_phosphate_oxygens,
"all_hydrogens": all_hydrogens
}
def process_frame(struct, center_sel, target_sel, r_max, exclude_bonds, bond_threshold):
"""处理单帧结构计算RDF"""
try:
atom_types = identify_atom_types(struct)
centers = center_sel(atom_types)
targets = target_sel(atom_types)
# 处理空原子类型情况
if len(centers) == 0 or len(targets) == 0:
logging.warning(f"No centers or targets found in frame")
return {
"distances": np.array([], dtype=np.float64),
"n_centers": 0,
"n_targets": 0,
"volume": struct.volume
}
center_coords = np.array([struct[i].coords for i in centers])
target_coords = np.array([struct[i].coords for i in targets])
lattice = struct.lattice
kdtree = cKDTree(target_coords, boxsize=lattice.abc)
# 动态确定邻居数量
k_val = min(50, len(targets))
if k_val == 0:
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
try:
distances, indices = kdtree.query(center_coords, k=k_val, distance_upper_bound=r_max)
except Exception as e:
logging.error(f"KDTree query error: {str(e)}")
return {
"distances": np.array([], dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
# 处理查询结果
valid_distances = []
for i in range(distances.shape[0]):
center_idx = centers[i]
for j in range(distances.shape[1]):
dist = distances[i, j]
if dist > r_max or np.isinf(dist):
continue
target_idx = targets[indices[i, j]]
# 排除化学键
if exclude_bonds:
actual_dist = struct.get_distance(center_idx, target_idx)
if actual_dist < bond_threshold:
continue
valid_distances.append(dist)
return {
"distances": np.array(valid_distances, dtype=np.float64),
"n_centers": len(centers),
"n_targets": len(targets),
"volume": struct.volume
}
except Exception as e:
logging.error(f"Error processing frame: {str(e)}")
return {
"distances": np.array([], dtype=np.float64),
"n_centers": 0,
"n_targets": 0,
"volume": struct.volume
}
def calculate_rdf(structures, center_sel, target_sel,
r_max=8.0, bin_width=0.05,
exclude_bonds=True, bond_threshold=1.3,
workers=1):
"""
计算径向分布函数
:param workers: 并行工作进程数
"""
bins = np.arange(0, r_max, bin_width)
hist = np.zeros(len(bins) - 1)
total_centers = 0
total_targets = 0
total_volume = 0
func = partial(process_frame,
center_sel=center_sel,
target_sel=target_sel,
r_max=r_max,
exclude_bonds=exclude_bonds,
bond_threshold=bond_threshold)
# 使用多进程池
with multiprocessing.Pool(processes=workers) as pool:
results = []
try:
for res in pool.imap_unordered(func, structures):
results.append(res)
except Exception as e:
logging.error(f"Error in parallel processing: {str(e)}")
# 处理结果
n_frames = 0
for res in results:
if res is None:
continue
n_frames += 1
valid_distances = res["distances"]
n_centers = res["n_centers"]
n_targets = res["n_targets"]
volume = res["volume"]
if len(valid_distances) > 0:
hist += np.histogram(valid_distances, bins=bins)[0]
total_centers += n_centers
total_targets += n_targets
total_volume += volume
if n_frames == 0:
r = bins[:-1] + bin_width/2
return r, np.zeros_like(r)
avg_density = total_targets / total_volume if total_volume > 0 else 0
r = bins[:-1] + bin_width/2
rdf = np.zeros_like(r)
for i in range(len(hist)):
r_lower = bins[i]
r_upper = bins[i+1]
shell_vol = 4/3 * np.pi * (r_upper**3 - r_lower**3)
expected_count = shell_vol * avg_density * total_centers
if expected_count > 1e-10:
rdf[i] = hist[i] / expected_count
else:
rdf[i] = 0
return r, rdf
# 选择器函数
def selector_non_phosphate_oxygens(atom_types):
return atom_types["non_phosphate_oxygens"]
def selector_all_hydrogens(atom_types):
return atom_types["all_hydrogens"]
def save_rdf_data(r, rdf, output_dir, system_name):
"""保存RDF数据到文本文件"""
os.makedirs(output_dir, exist_ok=True)
filename = os.path.join(output_dir, f"{system_name}_O_non-Op_H_RDF.txt")
try:
with open(filename, 'w', encoding='utf-8') as f:
# 使用A代替Å,避免编码问题
f.write("# Distance (A)\tg(r)\n")
for i in range(len(r)):
f.write(f"{r[i]:.4f}\t{rdf[i]:.6f}\n")
logging.info(f"Saved RDF data to: {filename}")
return True
except Exception as e:
logging.error(f"Error saving RDF data: {str(e)}")
return False
def plot_single_rdf(r, rdf, output_dir, system_name):
"""绘制并保存单个RDF图"""
try:
fig, ax = plt.subplots(figsize=(8, 6))
# 绘制RDF
ax.plot(r, rdf, 'b-', linewidth=2.0, label=f'{system_name} O_non-Op-H RDF')
# 标记氢键区域
ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region')
ax.text(1.3, np.max(rdf)*0.8, 'H-bond Region', fontsize=12)
# 设置坐标轴 - 使用A代替Å
ax.set_xlim(0, 6.0)
ax.set_ylim(0, np.max(rdf)*1.2)
ax.set_xlabel('Radial Distance (A)', fontweight='bold') # 使用A代替Å
ax.set_ylabel('g(r)', fontweight='bold')
ax.set_title(f'{system_name}: Non-Phosphate Oxygen - Hydrogen RDF', fontsize=16, pad=15)
# 添加网格和图例
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(loc='best', framealpha=0.8)
# 保存图片
plt.tight_layout()
filename = os.path.join(output_dir, f"{system_name}_O_non-Op_H_RDF.tiff")
plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff')
plt.close()
logging.info(f"Saved RDF plot to: {filename}")
return True
except Exception as e:
logging.error(f"Error plotting RDF for {system_name}: {str(e)}")
return False
def plot_combined_rdf(all_rdf_data, output_dir):
"""绘制并保存合并的RDF图"""
try:
fig, ax = plt.subplots(figsize=(10, 8))
# 颜色和线型设置
colors = ['b', 'r', 'g', 'm', 'c', 'y', 'k']
line_styles = ['-', '--', '-.', ':']
# 绘制所有体系的RDF曲线
for i, (system_name, (r, rdf)) in enumerate(all_rdf_data.items()):
color = colors[i % len(colors)]
line_style = line_styles[(i // len(colors)) % len(line_styles)]
ax.plot(r, rdf, color=color, linestyle=line_style,
linewidth=2.0, label=system_name)
# 标记氢键区域
ax.axvspan(1.0, 2.0, alpha=0.1, color='green', label='H-bond Region')
ax.text(1.3, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*0.8,
'H-bond Region', fontsize=12)
# 设置坐标轴
ax.set_xlim(0, 6.0)
ax.set_ylim(0, np.max([np.max(rdf) for _, (_, rdf) in all_rdf_data.items()])*1.2)
ax.set_xlabel('Radial Distance (A)', fontweight='bold')
ax.set_ylabel('g(r)', fontweight='bold')
ax.set_title('Non-Phosphate Oxygen - Hydrogen RDF Comparison', fontsize=16, pad=15)
# 添加网格和图例
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(loc='best', framealpha=0.8)
# 保存图片
plt.tight_layout()
filename = os.path.join(output_dir, "combined_O_non-Op_H_RDF.tiff")
plt.savefig(filename, bbox_inches='tight', dpi=600, format='tiff')
plt.close()
logging.info(f"Saved combined RDF plot to: {filename}")
return True
except Exception as e:
logging.error(f"Error plotting combined RDF: {str(e)}")
return False
def load_vasprun_safe(filename):
"""安全加载Vasprun文件,处理编码问题"""
try:
# 尝试使用pymatgen的标准方法
return Vasprun(filename, ionic_step_skip=5)
except Exception as e:
logging.warning(f"Standard loading failed for {filename}: {str(e)}. Trying alternative method...")
try:
# 尝试显式指定编码
return Vasprun(filename, ionic_step_skip=5, parse_dos=False, parse_eigen=False)
except Exception as e2:
logging.error(f"Alternative loading failed for {filename}: {str(e2)}")
return None
def process_single_file(input_file, workers, output_dir):
"""处理单个VASP结果文件"""
system_name = os.path.splitext(os.path.basename(input_file))[0]
logging.info(f"Processing {input_file} with {workers} workers...")
try:
# 安全加载VASP结果
vr = load_vasprun_safe(input_file)
if vr is None:
logging.error(f"Failed to load VASP results from {input_file}")
return None, None
structures = vr.structures
logging.info(f"Loaded {len(structures)} frames for {system_name}")
# 计算O_non-Op-H RDF
r, rdf = calculate_rdf(
structures,
selector_non_phosphate_oxygens, # 中心原子:非磷酸氧
selector_all_hydrogens, # 目标原子:所有氢原子
r_max=8.0,
bin_width=0.05,
exclude_bonds=True,
bond_threshold=0,
workers=workers
)
# 保存数据
save_success = save_rdf_data(r, rdf, output_dir, system_name)
# 绘制单个图表
plot_success = plot_single_rdf(r, rdf, output_dir, system_name)
if save_success and plot_success:
logging.info(f"Completed processing for {system_name}")
else:
logging.warning(f"Processing for {system_name} completed with errors")
return system_name, (r, rdf)
except Exception as e:
logging.error(f"Error processing {input_file}: {str(e)}")
return None, None
def main(input_files, workers=1):
"""主函数,处理多个文件"""
start_time = time.time()
# 创建输出目录
output_dir = "RDF_Results"
os.makedirs(output_dir, exist_ok=True)
# 存储所有体系的RDF数据
all_rdf_data = {}
# 处理每个输入文件
for input_file in input_files:
system_name, rdf_data = process_single_file(input_file, workers, output_dir)
if rdf_data:
all_rdf_data[system_name] = rdf_data
# 绘制合并的RDF图
if len(all_rdf_data) > 1:
plot_combined_rdf(all_rdf_data, output_dir)
elapsed = time.time() - start_time
logging.info(f"Completed all processing in {elapsed:.2f} seconds")
if __name__ == "__main__":
# 设置命令行参数
parser = argparse.ArgumentParser(description='Calculate O_non-Op-H RDF for multiple VASP simulations')
parser.add_argument('input_files', type=str, nargs='+',
help='Input vasprun.xml files (e.g., vasprun1.xml vasprun2.xml ...)')
parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(),
help=f'Number of parallel workers per file (default: {multiprocessing.cpu_count()})')
args = parser.parse_args()
logging.info(f"Starting O_non-Op-H RDF analysis for {len(args.input_files)} files with {args.workers} workers per file...")
main(args.input_files, workers=args.workers)
该代码输出的结果是非磷酸氧与所以H之间的RDF吗?输出的结果又如何命名的