大模型从训练到使用的完整流程
1. 预训练阶段(Pre-training)
a) 模型架构
class LanguageModel(nn.Module):
def __init__(self, vocab_size, d_model, num_heads, num_layers):
self.embedding = nn.Embedding(vocab_size, d_model)
self.transformer_blocks = nn.ModuleList([
TransformerBlock(d_model, num_heads)
for _ in range(num_layers)
])
self.fc_out = nn.Linear(d_model, vocab_size)
b) 训练目标
# 下一词预测任务
def train_step(input_text):
# 输入:"我喜欢吃苹果"