知识点回归:
- CPU性能的查看:看架构代际、核心数、线程数
- GPU性能的查看:看显存、看级别、看架构代际
- GPU训练的方法:数据和模型移动到GPU device上
- 类的call方法:为什么定义前向传播时可以直接写作self.fc1(x)
作业
# test_pytorch.py
import torch
# 检查导入是否成功
print(f"PyTorch版本: {torch.__version__}")
print(f"CUDA可用: {torch.cuda.is_available()}")
class MyClass:
def __init__(self, name):
self.name = name
def __call__(self, x):
"""使类的实例可以像函数一样被调用"""
return f"调用了 {self.name},输入参数是 {x}"
# 创建实例
obj = MyClass("示例对象")
# 像函数一样调用实例
result = obj(123)
print(result) # 输出: 调用了 示例对象,输入参数是 123
# 在PyTorch中,nn.Module的子类都实现了__call__方法
import torch.nn as nn
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc = nn.Linear(10, 2)
def forward(self, x):
return self.fc(x)
model = MyModel()
# 调用model(x)实际上是调用model.__call__(x),它会自动调用forward方法
x = torch.randn(5, 10)
output = model(x) # 等价于 output = model.forward(x)
print(f"模型输出形状: {output.shape}")
import torch
import torch.nn as nn
# 定义一个简单的模型
class SimpleModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 20)
self.fc2 = nn.Linear(20, 2)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 创建模型并移动到GPU
model = SimpleModel().to(device)
# 创建示例数据并移动到GPU
x = torch.randn(32, 10).to(device)
y = torch.randint(0, 2, (32,)).to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 简单训练循环
model.train()
for epoch in range(10):
optimizer.zero_grad()
outputs = model(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
import torch
import psutil
import platform
def get_cpu_info():
"""获取CPU的核心数、线程数和架构信息"""
print(f"CPU型号: {platform.processor()}")
print(f"物理核心数: {psutil.cpu_count(logical=False)}")
print(f"逻辑核心数(线程数): {psutil.cpu_count(logical=True)}")
def get_gpu_info():
"""获取GPU的基本信息"""
if torch.cuda.is_available():
print(f"GPU数量: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
print(f"显存大小: {torch.cuda.get_device_properties(i).total_memory / 1024 / 1024:.0f} MB")
else:
print("未检测到GPU")
if __name__ == "__main__":
get_cpu_info()
get_gpu_info()
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
# 模拟一个大模型
class LargeModel(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(1000, 5000), nn.ReLU(),
nn.Linear(5000, 5000), nn.ReLU(),
nn.Linear(5000, 5000), nn.ReLU(),
nn.Linear(5000, 10)
)
def forward(self, x):
return self.layers(x)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LargeModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 生成示例数据
x = torch.randn(1000, 1000).to(device)
y = torch.randint(0, 10, (1000,)).to(device)
dataset = TensorDataset(x, y)
# 解决方案1: 减小batch size
dataloader = DataLoader(dataset, batch_size=32) # 避免过大的batch size
# 解决方案2: 使用梯度累积 (模拟大batch训练)
accumulation_steps = 4
for epoch in range(5):
running_loss = 0.0
for i, (inputs, labels) in enumerate(dataloader):
outputs = model(inputs)
loss = criterion(outputs, labels)
loss = loss / accumulation_steps # 平均损失
loss.backward()
if (i + 1) % accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
running_loss += loss.item() * accumulation_steps
print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader):.4f}")
复习今天的内容,在巩固下代码。思考下为什么会出现这个问题。
2466

被折叠的 条评论
为什么被折叠?



