己ROOT),用file explore看不到data/data文件夹的解决办法

本文提供了解决Android DDMS连接真机(已ROOT)时无法通过360手机助手或豌豆荚等工具查看data/data文件夹问题的方法。包括使用RE文件管理器设置777权限及通过adb shell命令修改权限。
android DDMS 连接真机(己ROOT),用file explore看不到data/data文件夹的解决办法
问题是没有权限,用360手机助手或豌豆荚也是看不见的。 简单的办法是用RE文件管理器(授予root权限),把data和data/data设置成777权限

注意:用RE管理器打开看到默认不是777的,只是可读写还是不够的。

另外就是使用adb shell命令,但android下的shell是阉割了的 不能用-R参数 既使su到root帐号也执行不了

首先到adb shell 目录下,不一定是C:\Documents and Settings\Administrator请到对应自己adb的路径下开始adb命令

C:\Documents and Settings\Administrator>
1.adb shell
2.shell@umts_spyder:/$ su
su
root@umts_spyder:/# chmod 777 -R /data
chmod 777 -R /data
Unable to chmod -R: No such file or directory
root@umts_spyder:/#
只能一个一个文件夹去设置权限
root@umts_spyder:/# chmod 777 /data
chmod 777 /data
root@umts_spyder:/# chmod 777 /data/data
chmod 777 /data/data
root@umts_spyder:/#
import osimport numpy as npimport pandas as pdfrom tqdm import tqdmimport jsonfrom collections import defaultdict def read_xyz_file(file_path): """ 读取XYZ格式的分子结构文件 返回原子类型列表和坐标数组 """ try: with open(file_path, 'r') as f: lines = f.readlines() # 跳过空行 lines = [line.strip() for line in lines if line.strip()] if len(lines) < 2: raise ValueError(f"文件 {file_path} 内容不足") num_atoms = int(lines[0]) atoms = [] coords = [] for i in range(2, 2 + num_atoms): if i >= len(lines): raise ValueError(f"文件 {file_path} 原子数量与声明不符") parts = lines[i].split() if len(parts) < 4: continue atom = parts[0] try: x, y, z = map(float, parts[1:4]) atoms.append(atom) coords.append([x, y, z]) except ValueError: continue return atoms, np.array(coords) except Exception as e: print(f"读取文件 {file_path} 时出错: {e}") return [], np.array([]) def extract_basic_features(atoms, coords): """ 从分子结构中提取基本特征 """ if len(atoms) == 0: return {} # 计算质心 centroid = np.mean(coords, axis=0) # 计算原子间距离矩阵 n_atoms = len(atoms) dist_matrix = np.zeros((n_atoms, n_atoms)) for i in range(n_atoms): for j in range(i+1, n_atoms): dist = np.linalg.norm(coords[i] - coords[j]) dist_matrix[i, j] = dist dist_matrix[j, i] = dist # 简单统计特征 features = { 'num_atoms': n_atoms, 'centroid': centroid.tolist(), 'max_distance': np.max(dist_matrix), 'min_distance': np.min(dist_matrix[np.nonzero(dist_matrix)]) if n_atoms > 1 else 0, 'element_counts': {elem: atoms.count(elem) for elem in set(atoms)} } return features def process_reaction_folder(reaction_folder): """ 处理单个反应文件夹的数据 """ # 查找文件夹中的XYZ文件 xyz_files = [f for f in os.listdir(reaction_folder) if f.endswith('.xyz')] # 确定文件类型 reactant_file = None ts_file = None product_file = None for f in xyz_files: if 'reactant' in f.lower() or 'r_' in f.lower(): reactant_file = os.path.join(reaction_folder, f) elif 'ts' in f.lower() or 'transition' in f.lower(): ts_file = os.path.join(reaction_folder, f) elif 'product' in f.lower() or 'p_' in f.lower(): product_file = os.path.join(reaction_folder, f) # 如果无法通过文件名判断,尝试按顺序分配 if not all([reactant_file, ts_file, product_file]) and len(xyz_files) >= 3: xyz_files.sort() reactant_file, ts_file, product_file = [os.path.join(reaction_folder, f) for f in xyz_files[:3]] # 读取结构文件 reactant_atoms, reactant_coords = read_xyz_file(reactant_file) if reactant_file else ([], []) ts_atoms, ts_coords = read_xyz_file(ts_file) if ts_file else ([], []) product_atoms, product_coords = read_xyz_file(product_file) if product_file else ([], []) # 检查数据有效性 if len(reactant_atoms) == 0 or len(ts_atoms) == 0 or len(product_atoms) == 0: return None # 提取特征 reactant_features = extract_basic_features(reactant_atoms, reactant_coords) ts_features = extract_basic_features(ts_atoms, ts_coords) product_features = extract_basic_features(product_atoms, product_coords) # 计算反应物与过渡态之间的位移 # 注意:根据赛题说明,原子顺序已经对齐 displacement = ts_coords - reactant_coords return { 'reaction_id': os.path.basename(reaction_folder), 'reactant': { 'atoms': reactant_atoms, 'coords': reactant_coords, 'features': reactant_features }, 'ts': { 'atoms': ts_atoms, 'coords': ts_coords, 'features': ts_features }, 'product': { 'atoms': product_atoms, 'coords': product_coords, 'features': product_features }, 'displacement': displacement } def explore_dataset(data_root): """ 探索数据集,统计基本信息 """ # 获取所有反应文件夹 reaction_folders = [os.path.join(data_root, d) for d in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, d))] print(f"找到 {len(reaction_folders)} 个反应文件夹") # 处理每个反应 reactions_data = [] element_counts = defaultdict(int) atom_counts = [] failed_reactions = [] for folder in tqdm(reaction_folders, desc="处理反应数据"): try: reaction_data = process_reaction_folder(folder) if reaction_data: reactions_data.append(reaction_data) # 统计元素出现次数 for atom in reaction_data['reactant']['atoms']: element_counts[atom] += 1 # 统计原子数量 atom_counts.append(len(reaction_data['reactant']['atoms'])) else: failed_reactions.append(folder) except Exception as e: print(f"处理反应 {folder} 时出错: {e}") failed_reactions.append(folder) # 数据集统计信息 dataset_info = { 'total_reactions': len(reactions_data), 'failed_reactions': len(failed_reactions), 'elements': dict(element_counts), 'avg_atoms': np.mean(atom_counts) if atom_counts else 0, 'min_atoms': min(atom_counts) if atom_counts else 0, 'max_atoms': max(atom_counts) if atom_counts else 0, 'sample_reactions': reactions_data[:3] if reactions_data else [] # 保存前3个反应作为样本 } return dataset_info, reactions_data def save_dataset_info(dataset_info, output_path): """保存数据集信息到JSON文件""" # 将numpy数组转换为列表,以便JSON序列化 def convert_numpy(obj): if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) return obj with open(output_path, 'w') as f: json.dump(dataset_info, f, indent=2, default=convert_numpy) def main(): # 设置数据路径 - 根据你的实际路径修改 train_data_path = "train_data" # 确保数据路径存在 if not os.path.exists(train_data_path): print(f"错误: 数据路径 '{train_data_path}' 不存在") return # 探索数据集 print("开始探索数据集...") dataset_info, reactions_data = explore_dataset(train_data_path) # 打印数据集信息 print(f"\n数据集统计:") print(f"成功处理反应: {dataset_info['total_reactions']}") print(f"失败反应: {dataset_info['failed_reactions']}") print(f"元素分布: {dataset_info['elements']}") print(f"平均原子数: {dataset_info['avg_atoms']:.2f}") print(f"最小原子数: {dataset_info['min_atoms']}") print(f"最大原子数: {dataset_info['max_atoms']}") # 保存数据集信息 save_dataset_info(dataset_info, 'dataset_info.json') print("数据集信息已保存到 dataset_info.json") # 保存处理后的数据(可选) if reactions_data: # 你可以选择保存处理后的数据供后续使用 np.save('processed_reactions.npy', reactions_data) print("处理后的反应数据已保存到 processed_reactions.npy") if __name__ == "__main__": main() In [ ] import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom collections import Counterimport json # 设置中文字体支持(如果需要) plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 # 加载之前保存的数据集信息with open('dataset_info.json', 'r') as f: dataset_info = json.load(f) print("数据集详细信息:")print(f"总反应数: {dataset_info['total_reactions']}")print(f"元素分布: O(氧):{dataset_info['elements']['O']}, C(碳):{dataset_info['elements']['C']}, H(氢):{dataset_info['elements']['H']}, N(氮):{dataset_info['elements']['N']}")print(f"平均原子数: {dataset_info['avg_atoms']:.2f}")print(f"原子数范围: {dataset_info['min_atoms']} - {dataset_info['max_atoms']}") corrected_elements = { 'O': dataset_info['elements']['O'], 'C': dataset_info['elements']['C'], 'H': dataset_info['elements']['H'], 'N': dataset_info['elements']['N'] } # 可视化元素分布 plt.figure(figsize=(10, 6)) plt.bar(corrected_elements.keys(), corrected_elements.values()) plt.title('元素分布') plt.xlabel('元素') plt.ylabel('出现次数') plt.tight_layout() plt.savefig('element_distribution.png') plt.show() # 加载处理后的反应数据try: reactions_data = np.load('processed_reactions.npy', allow_pickle=True) print(f"成功加载 {len(reactions_data)} 个反应的数据") # 分析反应类型 reaction_types = [] for reaction in reactions_data[:100]: # 只分析前100个反应以节省时间 reactant_atoms = reaction['reactant']['atoms'] product_atoms = reaction['product']['atoms'] # 简单的反应类型判断(基于原子数量变化) atom_diff = len(product_atoms) - len(reactant_atoms) if atom_diff > 0: reaction_types.append('加成反应') elif atom_diff < 0: reaction_types.append('消除反应') else: reaction_types.append('重排反应') # 统计反应类型分布 type_counts = Counter(reaction_types) print("反应类型分布(基于原子数变化):") for rtype, count in type_counts.items(): print(f"{rtype}: {count}") # 可视化反应类型分布 plt.figure(figsize=(8, 6)) plt.pie(type_counts.values(), labels=type_counts.keys(), autopct='%1.1f%%') plt.title('反应类型分布') plt.savefig('reaction_type_distribution.png') plt.show() except FileNotFoundError: print("未找到 processed_reactions.npy 文件") reactions_data = [] # 创建分子描述符函数def create_molecular_descriptors(atoms, coords): """创建分子描述符""" if len(atoms) == 0: return {} # 原子质量字典 atomic_masses = { 'H': 1.008, 'C': 12.011, 'N': 14.007, 'O': 15.999, 'F': 18.998, 'P': 30.974, 'S': 32.065, 'Cl': 35.453 } # 计算分子质量 mass = sum(atomic_masses.get(atom, 12.0) for atom in atoms) # 计算质心 centroid = np.mean(coords, axis=0) # 计算惯性矩 centered_coords = coords - centroid inertia_tensor = np.zeros((3, 3)) for i in range(len(atoms)): x, y, z = centered_coords[i] mass_i = atomic_masses.get(atoms[i], 12.0) inertia_tensor[0, 0] += mass_i * (y**2 + z**2) inertia_tensor[1, 1] += mass_i * (x**2 + z**2) inertia_tensor[2, 2] += mass_i * (x**2 + y**2) inertia_tensor[0, 1] -= mass_i * x * y inertia_tensor[0, 2] -= mass_i * x * z inertia_tensor[1, 2] -= mass_i * y * z inertia_tensor[1, 0] = inertia_tensor[0, 1] inertia_tensor[2, 0] = inertia_tensor[0, 2] inertia_tensor[2, 1] = inertia_tensor[1, 2] # 计算惯性矩的主值 principal_moments = np.linalg.eigvalsh(inertia_tensor) # 计算回转半径 gyration_radius = np.sqrt(np.sum(principal_moments) / mass) if mass > 0 else 0 return { 'molecular_mass': mass, 'centroid': centroid.tolist(), 'principal_moments': principal_moments.tolist(), 'gyration_radius': gyration_radius } # 创建特征数据集if len(reactions_data) > 0: features_list = [] for i, reaction in enumerate(reactions_data[:500]): # 只处理前500个反应以节省时间 if i % 100 == 0: print(f"处理第 {i} 个反应...") reaction_id = reaction['reaction_id'] # 反应物特征 reactant_features = create_molecular_descriptors( reaction['reactant']['atoms'], reaction['reactant']['coords'] ) # 产物特征 product_features = create_molecular_descriptors( reaction['product']['atoms'], reaction['product']['coords'] ) # 过渡态特征 ts_features = create_molecular_descriptors( reaction['ts']['atoms'], reaction['ts']['coords'] ) # 计算反应物和产物之间的差异 mass_change = product_features['molecular_mass'] - reactant_features['molecular_mass'] gyration_change = product_features['gyration_radius'] - reactant_features['gyration_radius'] features = { 'reaction_id': reaction_id, 'num_atoms': len(reaction['reactant']['atoms']), 'reactant_mass': reactant_features['molecular_mass'], 'product_mass': product_features['molecular_mass'], 'ts_mass': ts_features['molecular_mass'], 'mass_change': mass_change, 'reactant_gyration': reactant_features['gyration_radius'], 'product_gyration': product_features['gyration_radius'], 'ts_gyration': ts_features['gyration_radius'], 'gyration_change': gyration_change } features_list.append(features) # 创建DataFrame并保存 feature_df = pd.DataFrame(features_list) feature_df.to_csv('molecular_features.csv', index=False) print("分子特征已保存到 molecular_features.csv") # 可视化特征分布 fig, axes = plt.subplots(2, 2, figsize=(12, 10)) axes[0, 0].hist(feature_df['num_atoms'], bins=20, alpha=0.7, color='skyblue') axes[0, 0].set_title('原子数分布') axes[0, 0].set_xlabel('原子数') axes[0, 0].set_ylabel('频率') axes[0, 1].hist(feature_df['reactant_mass'], bins=20, alpha=0.7, color='lightgreen') axes[0, 1].set_title('反应物分子质量分布') axes[0, 1].set_xlabel('分子质量') axes[0, 1].set_ylabel('频率') axes[1, 0].hist(feature_df['mass_change'], bins=20, alpha=0.7, color='lightcoral') axes[1, 0].set_title('质量变化分布') axes[1, 0].set_xlabel('质量变化') axes[1, 0].set_ylabel('频率') axes[1, 1].hist(feature_df['gyration_change'], bins=20, alpha=0.7, color='gold') axes[1, 1].set_title('回转半径变化分布') axes[1, 1].set_xlabel('回转半径变化') axes[1, 1].set_ylabel('频率') plt.tight_layout() plt.savefig('feature_distributions.png') plt.show() # 计算特征之间的相关性 correlation_matrix = feature_df[['num_atoms', 'reactant_mass', 'product_mass', 'mass_change', 'reactant_gyration', 'product_gyration', 'gyration_change']].corr() plt.figure(figsize=(10, 8)) sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0) plt.title('特征相关性矩阵') plt.tight_layout() plt.savefig('feature_correlation.png') plt.show() print("\n下一步建议:")print("1. 基于特征数据集探索化学反应模式")print("2. 开始构建机器学习模型")print("3. 考虑使用图神经网络(GNN)处理分子结构") In [ ] import numpy as npimport pandas as pdfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import mean_squared_errorfrom sklearn.preprocessing import StandardScaler # 加载特征数据try: feature_df = pd.read_csv('molecular_features.csv') print(f"成功加载 {len(feature_df)} 个反应的特征数据") # 准备特征和目标变量 # 这里我们使用简单的特征来预测过渡态的回转半径 # 在实际应用中,你需要预测原子坐标,这需要更复杂的模型 X = feature_df[['num_atoms', 'reactant_mass', 'product_mass', 'reactant_gyration', 'product_gyration']].values y = feature_df['ts_gyration'].values # 数据标准化 scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( X_scaled, y, test_size=0.2, random_state=42 ) # 训练随机森林模型 print("训练随机森林模型...") model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # 评估模型 y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) print(f"模型性能:") print(f"均方根误差(RMSE): {rmse:.4f}") print(f"平均回转半径: {np.mean(y):.4f}") print(f"RMSE/平均值: {rmse/np.mean(y)*100:.2f}%") # 特征重要性 feature_importance = pd.DataFrame({ 'feature': ['num_atoms', 'reactant_mass', 'product_mass', 'reactant_gyration', 'product_gyration'], 'importance': model.feature_importances_ }).sort_values('importance', ascending=False) print("\n特征重要性:") print(feature_importance) except FileNotFoundError: print("未找到 molecular_features.csv 文件,请先运行数据分析脚本") print("\n下一步建议:")print("1. 这个基线模型只是预测了一个简单的物理量")print("2. 实际任务需要预测完整的3D分子结构")print("3. 考虑使用图神经网络(GNN)或生成模型来处理这个任务") In [ ] import paddleimport paddle.nn as nnimport paddle.nn.functional as Fimport numpy as npimport matplotlib.pyplot as pltfrom tqdm import tqdmimport osimport json # 设置随机种子以确保可重复性 paddle.seed(42) np.random.seed(42) # 定义原子类型到索引的映射 ATOM_MAPPING = {'H': 0, 'C': 1, 'N': 2, 'O': 3} NUM_ATOM_TYPES = len(ATOM_MAPPING) class MolecularFeatureExtractor(nn.Layer): """分子特征提取器 - 不使用图结构""" def __init__(self, input_dim, hidden_dim=64): super(MolecularFeatureExtractor, self).__init__() self.linear1 = nn.Linear(input_dim, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) self.bn = nn.BatchNorm1D(hidden_dim) def forward(self, x): x = F.relu(self.linear1(x)) x = self.bn(x) x = F.relu(self.linear2(x)) # 全局平均池化 x = paddle.mean(x, axis=1, keepdim=False) return x class TransitionStatePredictor(nn.Layer): """基于PaddlePaddle的过渡态预测模型 - 不使用图结构""" def __init__(self, feature_dim, hidden_dim=64, output_dim=3, max_atoms=23): super(TransitionStatePredictor, self).__init__() self.max_atoms = max_atoms # 反应物和产物的特征提取器 self.reactant_encoder = MolecularFeatureExtractor(feature_dim, hidden_dim) self.product_encoder = MolecularFeatureExtractor(feature_dim, hidden_dim) # 融合层 self.fusion_linear1 = nn.Linear(hidden_dim * 2, hidden_dim) self.fusion_linear2 = nn.Linear(hidden_dim, hidden_dim) # 坐标预测层 self.coord_predictor = nn.Linear(hidden_dim, output_dim * max_atoms) def forward(self, reactant_feat, product_feat): # 编码反应物 rx = self.reactant_encoder(reactant_feat) # 编码产物 px = self.product_encoder(product_feat) # 融合反应物和产物信息 # 修正:使用paddle.concat而不是pandas.concat,并且axis=1 fused = paddle.concat([rx, px], axis=1) fused = F.relu(self.fusion_linear1(fused)) fused = F.relu(self.fusion_linear2(fused)) # 预测坐标位移 coord_shift = self.coord_predictor(fused) coord_shift = coord_shift.reshape([-1, self.max_atoms, 3]) return coord_shift def create_molecular_features(atoms, coords, max_atoms=23): """创建分子特征向量""" num_nodes = min(len(atoms), max_atoms) # 创建节点特征 (原子类型 + 坐标) node_features = [] for i in range(num_nodes): atom = atoms[i] coord = coords[i] # 原子类型 one-hot 编码 atom_idx = ATOM_MAPPING.get(atom, 0) atom_one_hot = [0] * NUM_ATOM_TYPES atom_one_hot[atom_idx] = 1 # 组合特征: 原子类型 + 坐标 node_feature = atom_one_hot + coord.tolist() node_features.append(node_feature) # 如果原子数少于最大值,填充零向量 if num_nodes < max_atoms: for _ in range(max_atoms - num_nodes): node_features.append([0] * (NUM_ATOM_TYPES + 3)) # 展平为单个向量 flattened_features = np.array(node_features).flatten() return flattened_features def prepare_paddle_data(reactions_data, max_atoms=23): """准备PaddlePaddle数据""" reactant_features = [] product_features = [] displacements = [] for reaction in tqdm(reactions_data, desc="准备分子特征数据"): # 处理反应物 reactant_atoms = reaction['reactant']['atoms'] reactant_coords = reaction['reactant']['coords'] reactant_feat = create_molecular_features(reactant_atoms, reactant_coords, max_atoms) # 处理产物 product_atoms = reaction['product']['atoms'] product_coords = reaction['product']['coords'] product_feat = create_molecular_features(product_atoms, product_coords, max_atoms) # 获取位移(反应物到过渡态) displacement = reaction['displacement'] # 如果原子数少于最大值,填充零 if displacement.shape[0] < max_atoms: padding = np.zeros((max_atoms - displacement.shape[0], 3)) displacement = np.vstack([displacement, padding]) reactant_features.append(reactant_feat) product_features.append(product_feat) displacements.append(displacement.flatten().astype('float32')) # 转换为Paddle张量 reactant_features = paddle.to_tensor(np.array(reactant_features, dtype='float32')) product_features = paddle.to_tensor(np.array(product_features, dtype='float32')) displacements = paddle.to_tensor(np.array(displacements, dtype='float32')) return reactant_features, product_features, displacements def train_paddle_model(model, data, num_epochs=50, learning_rate=0.001): """训练PaddlePaddle模型""" reactant_features, product_features, displacements = data # 定义优化器和损失函数 optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=learning_rate) criterion = nn.MSELoss() train_losses = [] # 创建数据集 dataset = [(reactant_features[i], product_features[i], displacements[i]) for i in range(len(reactant_features))] for epoch in range(num_epochs): model.train() total_loss = 0 for i in tqdm(range(len(dataset)), desc=f"Epoch {epoch+1}/{num_epochs}"): rx, px, disp = dataset[i] # 前向传播 pred_displacement = model(rx.unsqueeze(0), px.unsqueeze(0)) # 计算损失 loss = criterion(pred_displacement, disp.reshape(pred_displacement.shape)) # 反向传播 loss.backward() optimizer.step() optimizer.clear_grad() total_loss += loss.numpy()[0] avg_loss = total_loss / len(dataset) train_losses.append(avg_loss) print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}") return model, train_losses # 主程序if __name__ == "__main__": # 加载处理后的反应数据 try: reactions_data = np.load('processed_reactions.npy', allow_pickle=True) print(f"成功加载 {len(reactions_data)} 个反应的数据") # 只使用前100个反应进行演示(减少计算时间) sample_reactions = reactions_data[:100] # 准备数据 max_atoms = 23 data = prepare_paddle_data(sample_reactions, max_atoms) # 初始化模型 feature_dim = max_atoms * (NUM_ATOM_TYPES + 3) # 每个分子的特征维度 model = TransitionStatePredictor(feature_dim, hidden_dim=64, output_dim=3, max_atoms=max_atoms) # 训练模型 print("开始训练PaddlePaddle模型...") trained_model, train_losses = train_paddle_model( model, data, num_epochs=10, learning_rate=0.001 ) # 保存模型 paddle.save(trained_model.state_dict(), 'ts_predictor_paddle.pdparams') print("模型已保存到 ts_predictor_paddle.pdparams") # 绘制训练损失曲线 plt.figure(figsize=(10, 6)) plt.plot(train_losses) plt.title('训练损失曲线') plt.xlabel('Epoch') plt.ylabel('Loss') plt.savefig('training_loss_paddle.png') plt.show() except FileNotFoundError: print("未找到 processed_reactions.npy 文件,请先运行数据预处理脚本") In [ ] import paddleimport paddle.nn as nnimport numpy as npimport osimport json # 简单的线性插值模型class LinearInterpolationModel: """简单的线性插值模型 - 作为基线""" def predict(self, reactant_coords, product_coords): """ 使用简单的线性插值预测过渡态 假设过渡态位于反应物和产物的中间位置 """ return (reactant_coords + product_coords) / 2 # 主程序if __name__ == "__main__": # 加载处理后的反应数据 try: reactions_data = np.load('processed_reactions.npy', allow_pickle=True) print(f"成功加载 {len(reactions_data)} 个反应的数据") # 使用简单的线性插值模型 model = LinearInterpolationModel() # 评估模型 total_error = 0 count = 0 for reaction in reactions_data[:100]: # 只评估前100个反应 reactant_coords = reaction['reactant']['coords'] product_coords = reaction['product']['coords'] true_ts_coords = reaction['ts']['coords'] # 预测过渡态 pred_ts_coords = model.predict(reactant_coords, product_coords) # 计算误差 error = np.sqrt(np.mean((true_ts_coords - pred_ts_coords)**2)) total_error += error count += 1 if count % 10 == 0: print(f"反应 {count}, RMSE: {error:.4f} Å") avg_error = total_error / count print(f"平均RMSE: {avg_error:.4f} Å") # 保存简单的预测结果 predictions = [] for reaction in reactions_data[:100]: reactant_coords = reaction['reactant']['coords'] product_coords = reaction['product']['coords'] pred_ts_coords = model.predict(reactant_coords, product_coords) # 格式化预测结果 coords_list = [] for i, atom in enumerate(reaction['reactant']['atoms']): coords_list.append([atom, float(pred_ts_coords[i, 0]), float(pred_ts_coords[i, 1]), float(pred_ts_coords[i, 2])]) predictions.append({ "reaction_id": reaction['reaction_id'], "coordinates": json.dumps(coords_list) }) # 保存为CSV文件 import csv with open('simple_predictions.csv', 'w', newline='') as csvfile: fieldnames = ['reaction_id', 'coordinates'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for pred in predictions: writer.writerow(pred) print("简单预测结果已保存到 simple_predictions.csv") # 生成XYZ文件 os.makedirs('ts_predictions', exist_ok=True) for reaction in reactions_data[:100]: reactant_coords = reaction['reactant']['coords'] product_coords = reaction['product']['coords'] pred_ts_coords = model.predict(reactant_coords, product_coords) # 保存为XYZ格式 xyz_file = os.path.join('ts_predictions', f"{reaction['reaction_id']}.xyz") with open(xyz_file, 'w') as f: f.write(f"{len(reaction['reactant']['atoms'])}\n") f.write(f"Predicted transition state for {reaction['reaction_id']}\n") for i, atom in enumerate(reaction['reactant']['atoms']): f.write(f"{atom} {pred_ts_coords[i, 0]:.6f} {pred_ts_coords[i, 1]:.6f} {pred_ts_coords[i, 2]:.6f}\n") # 压缩XYZ文件 import zipfile with zipfile.ZipFile("ts_predictions.zip", 'w') as zipf: for root, dirs, files in os.walk('ts_predictions'): for file in files: if file.endswith('.xyz'): zipf.write(os.path.join(root, file), file) print("XYZ文件已压缩为 ts_predictions.zip") except FileNotFoundError: print("未找到 processed_reactions.npy 文件,请先运行数据预处理脚本") In [12] import subprocessimport pandas as pd # 检查评估脚本是否存在if os.path.exists('get_score.py'): try: # 运行评估脚本 result = subprocess.run(['python', 'get_score.py', 'simple_predictions.csv'], capture_output=True, text=True) print("评估脚本输出:") print("STDOUT:", result.stdout) print("STDERR:", result.stderr) print("返回码:", result.returncode) # 检查是否生成了result.txt if os.path.exists('result.txt'): with open('result.txt', 'r') as f: result_content = f.read() print("result.txt内容:") print(result_content) except Exception as e: print(f"运行评估脚本时出错: {e}")else: print("未找到 get_score.py 评估脚本") 评估脚本输出: STDOUT: 评测结果已保存至: /home/aistudio/result.txt STDERR: 返回码: 0 result.txt内容: 评测结果: 评测失败: 提交文件不存在: simple_predictions.csv 状态码: 1 ================================================== 原始JSON数据: { "score": 0.0, "errorMsg": "评测失败: 提交文件不存在: simple_predictions.csv", "code": 1, "data": [ { "score": 0 } ] } In [14] import osimport numpy as npimport jsonimport csvimport zipfilefrom tqdm import tqdm # 重新定义读取XYZ文件的函数,确保可用def read_xyz_file(file_path): """读取XYZ格式的分子结构文件""" try: with open(file_path, 'r') as f: lines = f.readlines() # 跳过空行 lines = [line.strip() for line in lines if line.strip()] if len(lines) < 2: raise ValueError(f"文件 {file_path} 内容不足") num_atoms = int(lines[0]) atoms = [] coords = [] for i in range(2, 2 + num_atoms): if i >= len(lines): raise ValueError(f"文件 {file_path} 原子数量与声明不符") parts = lines[i].split() if len(parts) < 4: continue atom = parts[0] try: x, y, z = map(float, parts[1:4]) atoms.append(atom) coords.append([x, y, z]) except ValueError: continue return atoms, np.array(coords) except Exception as e: print(f"读取文件 {file_path} 时出错: {e}") return [], np.array([]) # 线性插值模型class LinearInterpolationModel: """简单的线性插值模型 - 作为基线""" def predict(self, reactant_coords, product_coords): """ 使用简单的线性插值预测过渡态 假设过渡态位于反应物和产物的中间位置 """ return (reactant_coords + product_coords) / 2 def generate_predictions_for_test_set(test_data_path, output_csv="pre.csv"): """为测试集生成预测""" # 检查测试集路径是否存在 if not os.path.exists(test_data_path): print(f"错误: 测试集路径 '{test_data_path}' 不存在") return None # 获取所有测试反应文件夹 test_folders = [f for f in os.listdir(test_data_path) if os.path.isdir(os.path.join(test_data_path, f))] # 确保有500个测试反应 if len(test_folders) != 500: print(f"警告: 测试集包含 {len(test_folders)} 个反应,但预期有 500 个") print(f"找到 {len(test_folders)} 个测试反应") # 使用线性插值模型 model = LinearInterpolationModel() predictions = [] # 处理每个测试反应 for folder in tqdm(test_folders, desc="生成测试集预测"): reaction_id = folder # 直接使用文件夹名作为反应ID try: # 尝试不同的文件命名约定 reactant_files = [ os.path.join(test_data_path, folder, "reactant.xyz"), os.path.join(test_data_path, folder, "r.xyz"), os.path.join(test_data_path, folder, "R.xyz") ] product_files = [ os.path.join(test_data_path, folder, "product.xyz"), os.path.join(test_data_path, folder, "p.xyz"), os.path.join(test_data_path, folder, "P.xyz") ] # 找到实际存在的反应物文件 reactant_file = None for f in reactant_files: if os.path.exists(f): reactant_file = f break # 找到实际存在的产物文件 product_file = None for f in product_files: if os.path.exists(f): product_file = f break if not reactant_file or not product_file: print(f"警告: 反应 {reaction_id} 缺少反应物或产物文件") # 添加空预测 predictions.append({ "reaction_id": reaction_id, "coordinates": "[]" }) continue # 读取反应物和产物结构 reactant_atoms, reactant_coords = read_xyz_file(reactant_file) product_atoms, product_coords = read_xyz_file(product_file) # 确保反应物和产物原子数相同 if len(reactant_atoms) != len(product_atoms): print(f"警告: 反应 {reaction_id} 的反应物和产物原子数不同") # 使用最小原子数 min_atoms = min(len(reactant_atoms), len(product_atoms)) reactant_atoms = reactant_atoms[:min_atoms] reactant_coords = reactant_coords[:min_atoms] product_atoms = product_atoms[:min_atoms] product_coords = product_coords[:min_atoms] # 预测过渡态 pred_ts_coords = model.predict(reactant_coords, product_coords) # 格式化坐标数据为JSON字符串 - 修复JSON格式 coords_list = [] for i, atom in enumerate(reactant_atoms): coords_list.append([ atom, float(pred_ts_coords[i, 0]), float(pred_ts_coords[i, 1]), float(pred_ts_coords[i, 2]) ]) # 将坐标列表转换为JSON字符串,确保格式正确 coords_json = json.dumps(coords_list) # 添加到预测结果 predictions.append({ "reaction_id": reaction_id, "coordinates": coords_json }) except Exception as e: print(f"处理反应 {reaction_id} 时出错: {e}") # 添加空预测 predictions.append({ "reaction_id": reaction_id, "coordinates": "[]" }) # 按反应ID排序预测结果 predictions.sort(key=lambda x: x["reaction_id"]) # 保存为CSV文件 with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['reaction_id', 'coordinates'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for pred in predictions: writer.writerow(pred) print(f"预测结果已保存到 {output_csv}") print(f"共生成 {len(predictions)} 个预测") return output_csv def generate_xyz_files_for_test_set(test_data_path, output_dir="ts_predictions"): """为测试集生成XYZ文件""" # 检查测试集路径是否存在 if not os.path.exists(test_data_path): print(f"错误: 测试集路径 '{test_data_path}' 不存在") return None # 获取所有测试反应文件夹 test_folders = [f for f in os.listdir(test_data_path) if os.path.isdir(os.path.join(test_data_path, f))] print(f"找到 {len(test_folders)} 个测试反应") # 使用线性插值模型 model = LinearInterpolationModel() # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 处理每个测试反应 for folder in tqdm(test_folders, desc="生成XYZ文件"): reaction_id = folder # 直接使用文件夹名作为反应ID try: # 尝试不同的文件命名约定 reactant_files = [ os.path.join(test_data_path, folder, "reactant.xyz"), os.path.join(test_data_path, folder, "r.xyz"), os.path.join(test_data_path, folder, "R.xyz") ] product_files = [ os.path.join(test_data_path, folder, "product.xyz"), os.path.join(test_data_path, folder, "p.xyz"), os.path.join(test_data_path, folder, "P.xyz") ] # 找到实际存在的反应物文件 reactant_file = None for f in reactant_files: if os.path.exists(f): reactant_file = f break # 找到实际存在的产物文件 product_file = None for f in product_files: if os.path.exists(f): product_file = f break if not reactant_file or not product_file: print(f"警告: 反应 {reaction_id} 缺少反应物或产物文件") continue # 读取反应物和产物结构 reactant_atoms, reactant_coords = read_xyz_file(reactant_file) product_atoms, product_coords = read_xyz_file(product_file) # 确保反应物和产物原子数相同 if len(reactant_atoms) != len(product_atoms): print(f"警告: 反应 {reaction_id} 的反应物和产物原子数不同") # 使用最小原子数 min_atoms = min(len(reactant_atoms), len(product_atoms)) reactant_atoms = reactant_atoms[:min_atoms] reactant_coords = reactant_coords[:min_atoms] product_atoms = product_atoms[:min_atoms] product_coords = product_coords[:min_atoms] # 预测过渡态 pred_ts_coords = model.predict(reactant_coords, product_coords) # 为每个反应创建子文件夹 reaction_dir = os.path.join(output_dir, reaction_id) os.makedirs(reaction_dir, exist_ok=True) # 保存为XYZ格式 xyz_file = os.path.join(reaction_dir, "ts.xyz") with open(xyz_file, 'w') as f: f.write(f"{len(reactant_atoms)}\n") f.write(f"Predicted transition state for {reaction_id}\n") for i, atom in enumerate(reactant_atoms): f.write(f"{atom} {pred_ts_coords[i, 0]:.6f} {pred_ts_coords[i, 1]:.6f} {pred_ts_coords[i, 2]:.6f}\n") except Exception as e: print(f"处理反应 {reaction_id} 时出错: {e}") # 压缩XYZ文件 zip_path = "ts_predictions.zip" with zipfile.ZipFile(zip_path, 'w') as zipf: for root, dirs, files in os.walk(output_dir): for file in files: if file.endswith('.xyz'): file_path = os.path.join(root, file) # 在ZIP中保持目录结构 arcname = os.path.relpath(file_path, output_dir) zipf.write(file_path, arcname) print(f"XYZ文件已压缩为 {zip_path}") return zip_path # 主程序if __name__ == "__main__": # 设置测试集路径 test_data_path = "test_data" # 根据实际路径修改 # 生成CSV预测文件 csv_file = generate_predictions_for_test_set(test_data_path) # 生成XYZ文件 zip_file = generate_xyz_files_for_test_set(test_data_path) # 评估预测结果 if csv_file and os.path.exists('get_score.py'): import subprocess print("\n运行评估脚本...") result = subprocess.run(['python', 'get_score.py', csv_file], capture_output=True, text=True) print("评估脚本输出:") print("STDOUT:", result.stdout) print("STDERR:", result.stderr) print("返回码:", result.returncode) # 检查是否生成了result.txt if os.path.exists('result.txt'): with open('result.txt', 'r') as f: result_content = f.read() print("result.txt内容:") print(result_content) print("\n下一步:") print("1. 检查预测文件格式是否正确") print("2. 如果评估通过,提交 CSV 文件和 ZIP 文件") print("3. 如果评估失败,检查错误信息并修正") 写出与此代码相似的代码,使其能在飞桨aistudio中运行成功且能在比赛中提交成功,保留提供代码中对JSON数据的处理
最新发布
09-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值