# 需要的包
import torch
import numpy as np
# 假设的一条谱图数据,(m/z, intensity) 对列表
# 通常你可以用 pyteomics 读 .mgf 文件,这里直接给出例子
spectrum_peaks = [
(112.05, 45.2),
(114.10, 80.6),
(118.12, 12.4),
(200.50, 150.0),
(500.25, 70.0)
]
# 参数设定
max_mass = 5000 # 最高到 5000 Da
resolution = 0.1 # 分辨率 0.1 Da
# 计算 intensity vector 的长度
vector_length = int(max_mass / resolution) # 50000 bins
# 初始化全零向量
intensity_vector = torch.zeros(vector_length)
# 把谱图中的每个峰插进去
for mz, intensity in spectrum_peaks:
index = int(mz / resolution)
if 0 <= index < vector_length:
intensity_vector[index] = intensity
# intensity_vector 现在就是 DeepNovo 需要的格式了!
print("Intensity vector shape:", intensity_vector.shape)
print("Nonzero elements:", intensity_vector.nonzero(as_tuple=True)[0])
print("Corresponding intensities:", intensity_vector[intensity_vector.nonzero(as_tuple=True)])
# 处理成可以喂给CNN的格式(Batch, Channel, Width, Height)
# 假设 batch_size=1, 单通道, 高度=1
input_tensor = intensity_vector.view(1, 1, -1, 1) # (batch, channel, width, height)
print("Input tensor shape (for CNN):", input_tensor.shape)
# 小小模拟一个简单CNN
import torch.nn as nn
class SimpleSpectrumCNN(nn.Module):
def __init__(self):
super(SimpleSpectrumCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 4, kernel_size=(1, 4), stride=(1, 2))
self.relu = nn.ReLU()
self.fc = nn.Linear(4 * ((vector_length - 4)//2 + 1), 512) # 注意卷积后宽度变化
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = x.view(x.size(0), -1) # Flatten
x = self.fc(x)
return x
# 创建模型并跑一下
model = SimpleSpectrumCNN()
output = model(input_tensor)
print("Output shape:", output.shape) # 应该是 (batch_size, 512)