从 mgf格式到 intensity vector 的转换-优快云博客

本文链接：https://blog.youkuaiyun.com/Wis4e/article/details/147104040

# 需要的包
import torch
import numpy as np

# 假设的一条谱图数据，(m/z, intensity) 对列表
# 通常你可以用 pyteomics 读 .mgf 文件，这里直接给出例子
spectrum_peaks = [
    (112.05, 45.2),
    (114.10, 80.6),
    (118.12, 12.4),
    (200.50, 150.0),
    (500.25, 70.0)
]

# 参数设定
max_mass = 5000  # 最高到 5000 Da
resolution = 0.1  # 分辨率 0.1 Da

# 计算 intensity vector 的长度
vector_length = int(max_mass / resolution)  # 50000 bins

# 初始化全零向量
intensity_vector = torch.zeros(vector_length)

# 把谱图中的每个峰插进去
for mz, intensity in spectrum_peaks:
    index = int(mz / resolution)
    if 0 <= index < vector_length:
        intensity_vector[index] = intensity

# intensity_vector 现在就是 DeepNovo 需要的格式了！
print("Intensity vector shape:", intensity_vector.shape)
print("Nonzero elements:", intensity_vector.nonzero(as_tuple=True)[0])
print("Corresponding intensities:", intensity_vector[intensity_vector.nonzero(as_tuple=True)])

# 处理成可以喂给CNN的格式（Batch, Channel, Width, Height）
# 假设 batch_size=1, 单通道, 高度=1
input_tensor = intensity_vector.view(1, 1, -1, 1)  # (batch, channel, width, height)
print("Input tensor shape (for CNN):", input_tensor.shape)

# 小小模拟一个简单CNN
import torch.nn as nn

class SimpleSpectrumCNN(nn.Module):
    def __init__(self):
        super(SimpleSpectrumCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 4, kernel_size=(1, 4), stride=(1, 2))
        self.relu = nn.ReLU()
        self.fc = nn.Linear(4 * ((vector_length - 4)//2 + 1), 512)  # 注意卷积后宽度变化

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# 创建模型并跑一下
model = SimpleSpectrumCNN()
output = model(input_tensor)

print("Output shape:", output.shape)  # 应该是 (batch_size, 512)