import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
data = pd.read_csv('./merge_data/0929_multi_label.csv', header=None).values # shape: (1650000, 67)
X = data[:, :64] # (1650000, 67) - 频谱特征
y_full = data[:, 64:67] # (1650000,3) - 标签序列 [wifi, bt, zb]
n_timesteps_per_sample = 500
n_samples = 3300
print('X.shape =', X.shape)
print('y_full.shape = ', y_full.shape)
X_reshaped = X.reshape(n_samples, n_timesteps_per_sample, 64).astype(np.float32)
# 取每个样本最后一个的标签作为样本标签
y = y_full[499::500] # 第 499, 999, ..., 449999 行
y = y.astype(np.float32) # 转为 float32 便于 PyTorch 处理
print(f"Reshaped X shape: {X_reshaped.shape}") # (3300, 500, 64)
print(f"Labels y shape: {y.shape}") # (3300,3)
print("Sample label:", y[0]) # e.g., [1. 0. 1.]
X_cnn = X_reshaped.reshape(n_samples, 1, 500, 64) # 添加通道维度
print(f"Input shape for CNN: {X_cnn.shape}")
X_train, X_test, y_train, y_test = train_test_split(
X_cnn, y,
test_size=0.2,
random_state=42,
shuffle=True
)
# 转为 Tensor
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)
print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")
class SpectralDataset(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
train_dataset = SpectralDataset(X_train, y_train)
test_dataset = SpectralDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class SpectralCNN(nn.Module):
def __init__(self, num_classes=3):
super(SpectralCNN, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=(3, 3), padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2), # (250, 32)
nn.Conv2d(32, 64, kernel_size=(3, 3), padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2), # (125, 16)
nn.Conv2d(64, 128, kernel_size=(3, 3), padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2), # (62, 8)
)
# 计算展平后的维度:128 * 62 * 8 = 63488
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 62 * 8, 128),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
# 实例化模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SpectralCNN(num_classes=3).to(device)
# 损失函数和优化器
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs=50):
train_losses, test_losses = [], []
train_accs, test_accs = [], []
for epoch in range(epochs):
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
# print("Batch X shape:", inputs.shape) # [32, 1, 500, 64]
# print("Batch y shape:", labels.shape) # [32, 3]
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs) # [batch_size, 3]
loss = criterion(outputs, labels) # 计算 multi-label loss
loss.backward()
optimizer.step()
running_loss += loss.item()
# 预测
preds = torch.sigmoid(outputs) > 0.5 # bool tensor
preds = preds.int()
labels_int = labels.int()
# 准确率:全标签匹配才算正确
correct_train += (preds == labels_int).all(dim=1).sum().item()
total_train += labels.size(0)
train_acc = correct_train / total_train
train_losses.append(running_loss / len(train_loader))
train_accs.append(train_acc)
# 测试阶段
model.eval()
correct_test = 0
total_test = 0
test_loss = 0.0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
preds = torch.sigmoid(outputs) > 0.5
preds = preds.int()
labels_int = labels.int()
correct_test += (preds == labels_int).all(dim=1).sum().item()
total_test += labels.size(0)
test_acc = correct_test / total_test
test_losses.append(test_loss / len(test_loader))
test_accs.append(test_acc)
if (epoch+1) % 5 == 0:
print(f"Epoch [{epoch+1}/{epochs}], "
f"Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, "
f"Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}")
return train_losses, test_losses, train_accs, test_accs
# 开始训练
print("\nStarting training...")
epochs = 50
train_losses, test_losses, train_accs, test_accs = train_model(
model, train_loader, test_loader, criterion, optimizer, epochs=epochs
)
以上是我现在的代码,有问题吗?
最新发布