import numpy as np
import tensorflow as tf
import pandas as pd
import random
import argparse
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm
import tensorflow.keras.backend as K
from tensorflow.keras import layers, Model
配置GPU内存动态分配
gpus = tf.config.list_physical_devices(‘GPU’)
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.list_logical_devices(‘GPU’)
print(f"{len(gpus)} 物理GPU, {len(logical_gpus)} 逻辑GPU")
except RuntimeError as e:
print(e)
补充必要的工具函数
def positional_encoding(max_position, d_model, min_freq=1e-6):
position = np.arange(max_position)
freqs = min_freq **(2 * (np.arange(d_model) // 2) / d_model)
pos_enc = position.reshape(-1, 1) * freqs.reshape(1, -1)
pos_enc[:, ::2] = np.cos(pos_enc[:, ::2])
pos_enc[:, 1::2] = np.sin(pos_enc[:, 1::2])
return pos_enc.astype(np.float32)
初始化位置编码参数
dimn = 64 # 光谱编码维度
cnn_feature_dim = 64 # CNN输出特征维度
transformer_dim = 64 # Transformer特征维度
P = positional_encoding(256, transformer_dim, min_freq=1e2)
光谱数据增强
def augment_spectrum(mz_list, intensity_list, noise_factor=0.01):
noisy_mz = [mz + np.random.normal(0, noise_factor) for mz in mz_list]
noisy_intensity = [intensity * (1 + np.random.normal(0, noise_factor)) for intensity in intensity_list]
noisy_intensity = [max(0, i) for i in noisy_intensity]
return noisy_mz, noisy_intensity
def prepro_specs_train(df, augment=True):
df = df.reset_index(drop=True)
valid = []
mz_intensity = df[‘Spectrum’].to_list()
def process_line(line): pairs = line.split() mz_list = [] intensity_list = [] for pair in pairs: mz, intensity = pair.split(':') mz_list.append(float(mz)) intensity_list.append(float(intensity)) return mz_list, intensity_list for idx, intensities in tqdm(enumerate(mz_intensity), disable=False, desc="预处理光谱数据"): mz_list, intensity_list = process_line(intensities) mz_list.append(float(df.at[idx, 'Total Exact Mass'])) if augment: mz_list, intensity_list = augment_spectrum(mz_list, intensity_list) round_mz_list = [round(float(mz), 2) for mz in mz_list] round_intensity_list = [round(float(intensity), 2) for intensity in intensity_list] valid.append([round_mz_list, round_intensity_list]) return tf.ragged.constant(valid)
def prepro_specs_test(df):
return prepro_specs_train(df, augment=False)
def encoding(rag_tensor, P, dimn):
to_pad = []
for sample in tqdm(rag_tensor, desc=“编码光谱数据”):
mz_list = sample[0].numpy().tolist()
intensity_list = sample[1].numpy().tolist()
positions = [min(int(round(intensity * 100)), len(P)-1) for intensity in intensity_list] pos_enc = np.array([P[pos] for pos in positions]) if positions else np.zeros((1, dimn)) averaged_encoding = [np.mean(pos_enc[:, dim]) for dim in range(dimn)] to_pad.append(averaged_encoding) return np.array(to_pad, dtype=np.float32)
将Scaffold转换为摩根指纹
def scaffold_to_morgan(smiles, radius=2, nBits=2048):
if pd.isna(smiles) or smiles == ‘’:
return np.zeros(nBits, dtype=np.float32)
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return np.zeros(nBits, dtype=np.float32)
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
return np.array(fp, dtype=np.float32)
自定义Transformer编码器层
def transformer_encoder_layer(units, num_heads, dropout, name=“transformer_encoder_layer”):
inputs = layers.Input(shape=(None, units), name=“inputs”)
attention = layers.MultiHeadAttention( num_heads=num_heads, key_dim=units//num_heads, name="attention" )(inputs, inputs) attention = layers.Dropout(dropout)(attention) attention = layers.Add()([inputs, attention]) attention = layers.LayerNormalization(epsilon=1e-6)(attention) ffn = layers.Dense(units * 2, activation="gelu", kernel_regularizer=tf.keras.regularizers.l2(1e-4))(attention) ffn = layers.Dense(units, activation="gelu", kernel_regularizer=tf.keras.regularizers.l2(1e-4))(ffn) ffn = layers.Dropout(dropout)(ffn) outputs = layers.Add()([attention, ffn]) outputs = layers.LayerNormalization(epsilon=1e-6)(outputs) return Model(inputs=inputs, outputs=outputs, name=name)
构建CNN+Transformer模型
def build_cnn_transformer_encoder(input_dim=dimn, cnn_filters=cnn_feature_dim,
transformer_dim=transformer_dim, num_layers=2,
num_heads=2, dropout=0.3):
inputs = layers.Input(shape=(input_dim,), name=“input_layer”)
x = layers.Reshape((input_dim, 1))(inputs) x = layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='gelu', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x) x = layers.BatchNormalization()(x) x = layers.Dropout(dropout/2)(x) # 修复此处的类型错误 x = layers.Conv1D(filters=cnn_filters, kernel_size=5, padding='same', activation='gelu', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x) x = layers.BatchNormalization()(x) x = layers.Dropout(dropout/2)(x) # 修复此处的类型错误 if cnn_filters != transformer_dim: x = layers.Conv1D(filters=transformer_dim, kernel_size=1, padding='same')(x) pos_encoding = positional_encoding(input_dim, transformer_dim) pos_encoding = tf.convert_to_tensor(pos_encoding, dtype=tf.float32) pos_encoding = tf.expand_dims(pos_encoding, axis=0) x = layers.Add()([x, pos_encoding]) for i in range(num_layers): x = transformer_encoder_layer( units=transformer_dim, num_heads=num_heads, dropout=dropout, name=f"transformer_layer_{i}" )(x) x = layers.GlobalAveragePooling1D()(x) x = layers.Dense(2048, activation='relu')(x) x = layers.Dropout(0.2)(x) outputs = layers.Dense(2048, activation='relu', name="output_layer")(x) return Model(inputs=inputs, outputs=outputs, name="cnn_transformer_encoder")
NT-Xent损失函数
def nt_xent_loss(y_true, y_pred, temperature=0.05):
encoder_output = tf.nn.l2_normalize(y_pred, axis=1)
morgan_fp = tf.nn.l2_normalize(y_true, axis=1)
samples_per_group = 256 batch_size = tf.shape(encoder_output)[0] num_groups = batch_size // samples_per_group encoder_grouped = tf.reshape(encoder_output, (num_groups, samples_per_group, -1)) morgan_grouped = tf.reshape(morgan_fp, (num_groups, samples_per_group, -1)) similarity_matrix = tf.matmul(encoder_grouped, morgan_grouped, transpose_b=True) positive_similarity = similarity_matrix[:, 0, 0] base_mask = tf.logical_not(tf.eye(samples_per_group, dtype=tf.bool)) mask = tf.tile(tf.expand_dims(base_mask, 0), [num_groups, 1, 1]) group_similarities = similarity_matrix[:, 0, :] group_mask = mask[:, 0, :] negative_similarities = tf.boolean_mask(group_similarities, group_mask) numerator = tf.exp(positive_similarity / temperature) denominator = tf.reduce_sum(tf.exp(negative_similarities / temperature), axis=0) per_group_loss = -tf.math.log(numerator / (denominator + numerator)) return tf.reduce_mean(per_group_loss)
批次生成函数
def generate_batches(df, encoded_spectra, morgan_fps, groups_per_batch=2, shuffle=True):
valid_smiles = []
smiles_groups = df.groupby(‘SMILES’)
for smiles, group in smiles_groups: pos_count = len(group[group['Type'] == 'Pos']) neg_count = len(group[group['Type'] == 'Neg']) if pos_count == 1 and neg_count == 255: valid_smiles.append(smiles) if shuffle: random.shuffle(valid_smiles) for i in range(0, len(valid_smiles), groups_per_batch): batch_smiles = valid_smiles[i:i+groups_per_batch] if not batch_smiles: continue all_spectra = [] all_morgan = [] all_df = [] for smiles in batch_smiles: group = smiles_groups.get_group(smiles) pos_samples = group[group['Type'] == 'Pos'] neg_samples = group[group['Type'] == 'Neg'] ordered_group = pd.concat([pos_samples, neg_samples]) group_indices = ordered_group.index.tolist() all_spectra.append(encoded_spectra[group_indices]) all_morgan.append(morgan_fps[group_indices]) all_df.append(ordered_group) batch_spectra = np.concatenate(all_spectra, axis=0) batch_morgan = np.concatenate(all_morgan, axis=0) batch_df = pd.concat(all_df, ignore_index=True) yield batch_spectra, batch_morgan, batch_df
计算有效批次数
def count_valid_batches(df, groups_per_batch=2):
valid_count = 0
for _, group in df.groupby(‘SMILES’):
pos_count = len(group[group[‘Type’] == ‘Pos’])
neg_count = len(group[group[‘Type’] == ‘Neg’])
if pos_count == 1 and neg_count == 255:
valid_count += 1
return (valid_count + groups_per_batch - 1) // groups_per_batch
计算Top1准确度
def calculate_top1_accuracy(model, test_df, test_spectra, test_morgan, groups_per_batch=2):
correct = 0
total = 0
test_generator = generate_batches( test_df, test_spectra, test_morgan, groups_per_batch=groups_per_batch, shuffle=False ) test_batch_count = count_valid_batches(test_df, groups_per_batch=groups_per_batch) for _ in tqdm(range(test_batch_count), desc="计算Top1准确度"): batch_spectra, batch_morgan, _ = next(test_generator) encoder_output = model(batch_spectra) samples_per_group = 256 num_groups = len(batch_spectra) // samples_per_group for group_idx in range(num_groups): start_idx = group_idx * samples_per_group end_idx = start_idx + samples_per_group group_encoder = encoder_output[start_idx:end_idx] group_morgan = batch_morgan[start_idx:end_idx] similarities = cosine_similarity(group_encoder, group_morgan) pos_idx = 0 pos_similarities = similarities[pos_idx] max_sim_idx = np.argmax(pos_similarities) if max_sim_idx == pos_idx: correct += 1 total += 1 if total == 0: return 0.0 return correct / total
数据加载和预处理
def load_and_preprocess_data(csv_path, test_size=0.2, random_state=46):
print(f"加载数据: {csv_path}")
df = pd.read_csv(csv_path)
valid_smiles = [] smiles_groups = df.groupby('SMILES') for smiles, group in smiles_groups: pos_count = len(group[group['Type'] == 'Pos']) neg_count = len(group[group['Type'] == 'Neg']) if pos_count == 1 and neg_count == 255: valid_smiles.append(smiles) print(f"有效SMILES组数: {len(valid_smiles)}") # 先划分出独立测试集 train_val_smiles, test_smiles = train_test_split( valid_smiles, test_size=test_size, random_state=random_state ) train_val_df = df[df['SMILES'].isin(train_val_smiles)].reset_index(drop=True) test_df = df[df['SMILES'].isin(test_smiles)].reset_index(drop=True) print(f"训练验证集大小: {len(train_val_df)}, 独立测试集大小: {len(test_df)}") print(f"训练验证SMILES数量: {len(train_val_smiles)}, 测试SMILES数量: {len(test_smiles)}") # 处理测试集 print("预处理测试集光谱数据...") test_rag_tensor = prepro_specs_test(test_df) test_encoded_spectra = encoding(test_rag_tensor, P, dimn) print("转换测试集Scaffold为摩根指纹...") test_df['morgan_fp'] = test_df['Scaffold'].apply(scaffold_to_morgan) test_morgan_fps = np.stack(test_df['morgan_fp'].values) return train_val_df, train_val_smiles, test_df, test_encoded_spectra, test_morgan_fps
交叉验证训练函数
def cross_validate_model(train_val_df, train_val_smiles, hyperparams, n_splits=10, epochs=10):
“”“执行十折交叉验证评估给定超参数”“”
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_results = []
for fold, (train_idx, val_idx) in enumerate(kf.split(train_val_smiles)): print(f"\n{'='*50}") print(f"开始第 {fold+1}/{n_splits} 折交叉验证") print(f"{'='*50}\n") # 划分当前折的训练集和验证集 train_smiles = [train_val_smiles[i] for i in train_idx] val_smiles = [train_val_smiles[i] for i in val_idx] fold_train_df = train_val_df[train_val_df['SMILES'].isin(train_smiles)].reset_index(drop=True) fold_val_df = train_val_df[train_val_df['SMILES'].isin(val_smiles)].reset_index(drop=True) # 预处理光谱数据 print("预处理当前折训练集光谱数据...") train_rag_tensor = prepro_specs_train(fold_train_df, augment=True) train_encoded_spectra = encoding(train_rag_tensor, P, dimn) print("预处理当前折验证集光谱数据...") val_rag_tensor = prepro_specs_test(fold_val_df) val_encoded_spectra = encoding(val_rag_tensor, P, dimn) # 处理摩根指纹 print("转换当前折训练集Scaffold为摩根指纹...") fold_train_df['morgan_fp'] = fold_train_df['Scaffold'].apply(scaffold_to_morgan) train_morgan_fps = np.stack(fold_train_df['morgan_fp'].values) print("转换当前折验证集Scaffold为摩根指纹...") fold_val_df['morgan_fp'] = fold_val_df['Scaffold'].apply(scaffold_to_morgan) val_morgan_fps = np.stack(fold_val_df['morgan_fp'].values) # 构建模型 model = build_cnn_transformer_encoder( input_dim=dimn, cnn_filters=hyperparams['cnn_filters'], transformer_dim=hyperparams['transformer_dim'], num_layers=hyperparams['num_layers'], num_heads=hyperparams['num_heads'], dropout=hyperparams['dropout'] ) # 编译模型 optimizer = tf.keras.optimizers.Adam(learning_rate=hyperparams['learning_rate']) model.compile(optimizer=optimizer, loss=nt_xent_loss) # 计算批次数 train_batch_count = count_valid_batches(fold_train_df, groups_per_batch=hyperparams['groups_per_batch']) val_batch_count = count_valid_batches(fold_val_df, groups_per_batch=hyperparams['groups_per_batch']) print(f"当前折训练批次数: {train_batch_count}, 验证批次数: {val_batch_count}") # 训练模型 best_val_acc = 0.0 for epoch in range(epochs): print(f"\n第 {epoch+1}/{epochs} 轮") train_total_loss = 0.0 train_generator = generate_batches( fold_train_df, train_encoded_spectra, train_morgan_fps, groups_per_batch=hyperparams['groups_per_batch'], shuffle=True ) # 训练 for _ in tqdm(range(train_batch_count), desc="训练"): batch_spectra, batch_morgan, _ = next(train_generator) loss = model.train_on_batch(batch_spectra, batch_morgan) train_total_loss += loss train_avg_loss = train_total_loss / train_batch_count if train_batch_count > 0 else 0.0 # 验证 val_generator = generate_batches( fold_val_df, val_encoded_spectra, val_morgan_fps, groups_per_batch=hyperparams['groups_per_batch'], shuffle=False ) val_total_loss = 0.0 for _ in range(val_batch_count): batch_spectra, batch_morgan, _ = next(val_generator) loss = model.test_on_batch(batch_spectra, batch_morgan) val_total_loss += loss val_avg_loss = val_total_loss / val_batch_count if val_batch_count > 0 else 0.0 # 计算验证集准确度 val_acc = calculate_top1_accuracy( model, fold_val_df, val_encoded_spectra, val_morgan_fps, groups_per_batch=hyperparams['groups_per_batch'] ) print(f"训练损失: {train_avg_loss:.6f}, 验证损失: {val_avg_loss:.6f}, 验证准确度: {val_acc:.6f}") # 保存当前折中最佳模型 if val_acc > best_val_acc: best_val_acc = val_acc fold_results.append(best_val_acc) print(f"\n第 {fold+1} 折最佳验证准确度: {best_val_acc:.6f}") # 清除内存 K.clear_session() # 计算交叉验证结果 mean_acc = np.mean(fold_results) std_acc = np.std(fold_results) print(f"\n{'='*50}") print(f"十折交叉验证结果: {mean_acc:.6f} ± {std_acc:.6f}") print(f"{'='*50}\n") return mean_acc, std_acc, fold_results
在完整训练集上训练最终模型
def train_final_model(train_val_df, hyperparams, epochs=10):
print(“\n开始在完整训练集上训练最终模型…”)
# 预处理所有训练数据 print("预处理完整训练集光谱数据...") train_rag_tensor = prepro_specs_train(train_val_df, augment=True) train_encoded_spectra = encoding(train_rag_tensor, P, dimn) print("转换完整训练集Scaffold为摩根指纹...") train_val_df['morgan_fp'] = train_val_df['Scaffold'].apply(scaffold_to_morgan) train_morgan_fps = np.stack(train_val_df['morgan_fp'].values) # 构建模型 model = build_cnn_transformer_encoder( input_dim=dimn, cnn_filters=hyperparams['cnn_filters'], transformer_dim=hyperparams['transformer_dim'], num_layers=hyperparams['num_layers'], num_heads=hyperparams['num_heads'], dropout=hyperparams['dropout'] ) # 编译模型 optimizer = tf.keras.optimizers.Adam(learning_rate=hyperparams['learning_rate']) model.compile(optimizer=optimizer, loss=nt_xent_loss) # 计算批次数 train_batch_count = count_valid_batches(train_val_df, groups_per_batch=hyperparams['groups_per_batch']) print(f"完整训练集批次数: {train_batch_count}") # 训练模型 for epoch in range(epochs): print(f"\n第 {epoch+1}/{epochs} 轮") train_total_loss = 0.0 train_generator = generate_batches( train_val_df, train_encoded_spectra, train_morgan_fps, groups_per_batch=hyperparams['groups_per_batch'], shuffle=True ) # 训练 for _ in tqdm(range(train_batch_count), desc="训练"): batch_spectra, batch_morgan, _ = next(train_generator) loss = model.train_on_batch(batch_spectra, batch_morgan) train_total_loss += loss train_avg_loss = train_total_loss / train_batch_count if train_batch_count > 0 else 0.0 print(f"训练损失: {train_avg_loss:.6f}") return model
主函数
def main(args):
# 加载数据并划分训练验证集和独立测试集
train_val_df, train_val_smiles, test_df, test_encoded_spectra, test_morgan_fps = load_and_preprocess_data(
args.data, test_size=args.test_size
)
# 定义超参数搜索空间 - 修复了参数格式,每个字典代表一组完整的超参数组合 param_grid = [ { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 2, 'dropout': 0.3, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 2, 'dropout': 0.3, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 2, 'dropout': 0.4, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 2, 'dropout': 0.4, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 4, 'dropout': 0.3, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 4, 'dropout': 0.3, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 4, 'dropout': 0.4, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 1, 'num_heads': 4, 'dropout': 0.4, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 2, 'dropout': 0.3, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 2, 'dropout': 0.3, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 2, 'dropout': 0.4, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 2, 'dropout': 0.4, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 4, 'dropout': 0.3, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 4, 'dropout': 0.3, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 4, 'dropout': 0.4, 'learning_rate': 1e-4, 'groups_per_batch': args.groups_per_batch }, { 'cnn_filters': 64, 'transformer_dim': 64, 'num_layers': 2, 'num_heads': 4, 'dropout': 0.4, 'learning_rate': 5e-5, 'groups_per_batch': args.groups_per_batch } ] # 超参数优化 best_score = -1 best_params = None results = [] print("\n开始超参数搜索和十折交叉验证...") for i, params in enumerate(param_grid): print(f"\n{'#'*50}") print(f"测试第 {i+1}/{len(param_grid)} 组超参数: {params}") print(f"{'#'*50}\n") mean_acc, std_acc, fold_results = cross_validate_model( train_val_df, train_val_smiles, params, n_splits=10, epochs=args.epochs ) results.append({ 'params': params, 'mean_acc': mean_acc, 'std_acc': std_acc, 'fold_results': fold_results }) if mean_acc > best_score: best_score = mean_acc best_params = params # 输出最佳超参数 print("\n" + "="*70) print("超参数搜索完成!") print(f"最佳超参数: {best_params}") print(f"最佳交叉验证准确度: {best_score:.6f}") print("="*70 + "\n") # 使用最佳超参数训练最终模型 final_model = train_final_model( train_val_df, best_params, epochs=args.epochs ) # 在独立测试集上评估 print("\n在独立测试集上评估最终模型...") test_acc = calculate_top1_accuracy( final_model, test_df, test_encoded_spectra, test_morgan_fps, groups_per_batch=best_params['groups_per_batch'] ) print(f"\n{'='*70}") print(f"独立测试集Top1准确度: {test_acc:.6f}") print("="*70 + "\n") # 保存模型 final_model.save(args.output) print(f"最终模型已保存至: {args.output}") # 保存超参数搜索结果 import json with open('hyperparameter_results.json', 'w') as f: json.dump(results, f, indent=2, default=lambda x: x.tolist() if isinstance(x, np.ndarray) else x) print("超参数搜索结果已保存至 hyperparameter_results.json")
if name == “main”:
parser = argparse.ArgumentParser(description=‘带十折交叉验证的质谱数据对比学习模型(CNN+Transformer)’)
parser.add_argument(‘–data’, type=str, default=‘/home/admin123/code/骨架256.csv’, help=‘CSV数据文件路径’)
parser.add_argument(‘–epochs’, type=int, default=1000, help=‘训练轮数’)
parser.add_argument(‘–test_size’, type=float, default=0.2, help=‘独立测试集比例’)
parser.add_argument(‘–groups_per_batch’, type=int, default=8,
choices=range(1, 65),
help=‘每个批次包含的SMILES组数(1到64之间)’)
parser.add_argument(‘–output’, type=str, default=‘best_cnn_transformer_encoder.h5’, help=‘最终模型保存路径’)
args = parser.parse_args() main(args)修改代码确保计算损失时提取到的正样本是来自group['Type'] == 'Pos',写出修改后的完整代码
最新发布