from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from sklearn.utils import shuffle
from torch.utils.data import random_split
# 定义VGG16模型
class VGG16(nn.Module):
def __init__(self, num_classes=10):
super(VGG16, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
# 参数设置
num_classes = 10
batch_size = 16
num_epochs = 100
lr = 0.0001
# 数据增强
transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
])
# 载入数据集
dataset = ImageFolder('D:\实习\dataset1', transform=transform)
torch.manual_seed(42)
# 划分训练集、验证集、测试集
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(42))
# 创建数据加载器
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)
# 初始化模
model = VGG16(num_classes)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
# 设置设备(CPU或GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 将模型移至设备
model.to(device)
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
# 训练过程中收集的准确率和损失数据
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
# 训练
for epoch in range(num_epochs):
running_loss = 0.0
correct = 0
total = 0
for inputs, labels in train_loader:
inputs = inputs.to(device)
labels = labels.to(device)
# 正向传播
outputs = model(inputs)
loss = criterion(outputs, labels)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted.to(device) == labels.to(device)).sum().item()
# 打印训练集的损失率和准确率
epoch_loss = running_loss / len(train_set)
epoch_accuracy = 100 * correct / total
# 收集训练和验证集的准确率和损失数据
train_losses.append(epoch_loss)
train_accuracies.append(epoch_accuracy)
print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%")
# 验证
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
for inputs, labels in val_loader:
inputs = inputs.to(device) # 将输入数据转移到相同的设备上
labels = labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted.to(device) == labels.to(device)).sum().item()
loss = criterion(outputs, labels)
total_loss += loss.item()
val_accuracy = 100 * correct / total
val_loss = total_loss / len(val_loader)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")
# 测试
y_pred = []
y_true = []
test_loss = 0.0
predicted_labels = []
true_labels = []
with torch.no_grad():
for inputs, labels in test_loader:
inputs = inputs.to(device) # 将输入数据转移到相同的设备上
labels = labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
y_pred.extend(predicted.tolist())
y_true.extend(labels.tolist())
total += labels.size(0)
correct += (predicted == labels).sum().item()
test_loss += criterion(outputs, labels)
# 收集预测标签和真实标签,以便计算更多指标
predicted_labels.extend(predicted.cpu().numpy())
true_labels.extend(labels.cpu().numpy())
test_accuracy = (100 * correct / total)
test_loss = test_loss / len(test_loader)
print(f'Test Loss: {test_loss},Test Accuracy: {test_accuracy:.2f}%')
# 计算混淆矩阵
conf_matrix = confusion_matrix(y_true, y_pred)
# 可视化混淆矩阵
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=range(num_classes), yticklabels=range(num_classes))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('E:/Confusion_Matrix.png')
# 打印其他指标
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred, average='macro')}")
print(f"F1 Score: {f1_score(y_true, y_pred, average='macro')}")
print(f"Precision: {precision_score(y_true, y_pred, average='macro')}")
# 绘制训练和验证集的准确率图表
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
# 绘制训练和验证集的损失图表
plt.subplot(1, 2, 2)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig('D:\实习\\train_vali\training_validation_loss_accuracy.png')
出现如下错误:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[17], line 84
77 transform = transforms.Compose([
78 transforms.RandomResizedCrop(224),
79 transforms.RandomHorizontalFlip(),
80 transforms.ToTensor(),
81 ])
83 # 载入数据集
---> 84 dataset = ImageFolder('D:\实习\dataset1', transform=transform)
85 torch.manual_seed(42)
86 # 划分训练集、验证集、测试集
File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:309, in ImageFolder.__init__(self, root, transform, target_transform, loader, is_valid_file)
301 def __init__(
302 self,
303 root: str,
(...)
307 is_valid_file: Optional[Callable[[str], bool]] = None,
308 ):
--> 309 super().__init__(
310 root,
311 loader,
312 IMG_EXTENSIONS if is_valid_file is None else None,
313 transform=transform,
314 target_transform=target_transform,
315 is_valid_file=is_valid_file,
316 )
317 self.imgs = self.samples
File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:144, in DatasetFolder.__init__(self, root, loader, extensions, transform, target_transform, is_valid_file)
134 def __init__(
135 self,
136 root: str,
(...)
141 is_valid_file: Optional[Callable[[str], bool]] = None,
142 ) -> None:
143 super().__init__(root, transform=transform, target_transform=target_transform)
--> 144 classes, class_to_idx = self.find_classes(self.root)
145 samples = self.make_dataset(self.root, class_to_idx, extensions, is_valid_file)
147 self.loader = loader
File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:218, in DatasetFolder.find_classes(self, directory)
191 def find_classes(self, directory: str) -> Tuple[List[str], Dict[str, int]]:
192 """Find the class folders in a dataset structured as follows::
193
194 directory/
(...)
216 (Tuple[List[str], Dict[str, int]]): List of all classes and dictionary mapping each class to an index.
217 """
--> 218 return find_classes(directory)
File C:\ProgramData\anaconda3\envs\pytorch-python3.8\lib\site-packages\torchvision\datasets\folder.py:42, in find_classes(directory)
40 classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
41 if not classes:
---> 42 raise FileNotFoundError(f"Couldn't find any class folder in {directory}.")
44 class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
45 return classes, class_to_idx
FileNotFoundError: Couldn't find any class folder in D:\实习\dataset1.