import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import random
from tqdm import tqdm
import platform
# ===== 中文字体支持设置 =====
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rcParams
# 设置中文字体支持
def set_chinese_font():
try:
# 尝试使用系统字体
font_list = ['SimHei', 'Microsoft YaHei', 'KaiTi', 'SimSun', 'FangSong', 'STSong', 'STKaiti']
available_fonts = [f.name for f in fm.fontManager.ttflist]
# 查找系统支持的中文字体
chinese_font = None
for font_name in font_list:
if any(font_name in f for f in available_fonts):
chinese_font = font_name
break
# 如果找到中文字体则应用
if chinese_font:
rcParams['font.sans-serif'] = [chinese_font]
rcParams['axes.unicode_minus'] = False # 解决负号显示问题
print(f"Set Chinese font: {chinese_font}")
else:
# 尝试从网络下载中文字体
try:
import os
from urllib.request import urlretrieve
font_path = "NotoSansCJK-Regular.ttc"
if not os.path.exists(font_path):
print("Downloading Chinese font...")
urlretrieve("https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/Chinese/NotoSansCJK-Regular.ttc",
font_path)
fm.fontManager.addfont(font_path)
rcParams['font.sans-serif'] = ['Noto Sans CJK SC']
rcParams['axes.unicode_minus'] = False
print("Set downloaded Chinese font")
except Exception as e:
print(f"Failed to set Chinese font: {str(e)}")
print("Chinese display may be incorrect")
except Exception as e:
print(f"Font setting error: {str(e)}")
# 改进的参照化模态感知网络模型,增加正则化防止过拟合
class ImprovedRMAN(nn.Module):
def __init__(self, num_classes=9, dropout_rate=0.3):
super(ImprovedRMAN, self).__init__()
self.num_classes = num_classes
self.dropout_rate = dropout_rate
# 特征提取器 - 增加了Dropout和调整了通道数
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), # 减少初始通道数
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout2d(p=dropout_rate/2), # 卷积后的Dropout
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # 减少通道数
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Dropout2d(p=dropout_rate/2), # 卷积后的Dropout
)
# 动态计算全连接层输入维度
self.fc_input_dim = self._calculate_fc_input_dim()
# 模态感知模块 - 增加Dropout
self.modality_aware = nn.Sequential(
nn.Linear(self.fc_input_dim, 128), # 减少隐藏层大小
nn.BatchNorm1d(128),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout_rate), # 全连接后的Dropout
)
# 参照向量(每个类别一个)
self.reference_vectors = nn.Parameter(torch.randn(num_classes, 128))
# 分类器
self.classifier = nn.Linear(128, num_classes)
# 初始化权重
self._initialize_weights()
def _initialize_weights(self):
# 权重初始化以提高稳定性
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def _calculate_fc_input_dim(self):
x = torch.randn(1, 3, 28, 28)
x = self.features(x)
return x.view(1, -1).size(1)
def forward(self, x):
# 特征提取
x = self.features(x)
x = x.view(x.size(0), -1)
# 模态感知特征
features = self.modality_aware(x)
# 参照感知计算
# 计算特征与每个参照向量的相似度
similarities = F.cosine_similarity(
features.unsqueeze(1),
self.reference_vectors.unsqueeze(0),
dim=2
)
# 使用相似度加权参照向量
attention_weights = F.softmax(similarities, dim=1)
weighted_ref = torch.sum(
attention_weights.unsqueeze(2) * self.reference_vectors.unsqueeze(0),
dim=1
)
# 特征增强:原始特征 + 加权参照向量
enhanced_features = features + weighted_ref
# 分类
out = self.classifier(enhanced_features)
return out
# 可视化混淆矩阵以分析模型性能
def plot_confusion_matrix(all_labels, all_preds, class_names):
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('confusion_matrix.png')
plt.show()
# 打印分类报告
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))
# 可视化随机预测
def visualize_predictions(model, dataset, mean, std, num_samples=5):
class_names = dataset.dataset.classes
indices = random.sample(range(len(dataset)), num_samples)
model.eval()
plt.figure(figsize=(15, 10))
for i, idx in enumerate(indices):
image, true_label = dataset[idx]
image_batch = image.unsqueeze(0).to(next(model.parameters()).device)
with torch.no_grad():
output = model(image_batch)
_, pred_label_idx = torch.max(output, 1)
pred_label = pred_label_idx.item()
# 反标准化
image_np = image.permute(1, 2, 0).cpu().numpy()
image_np = image_np * std.cpu().numpy() + mean.cpu().numpy()
image_np = np.clip(image_np, 0, 1)
plt.subplot(1, num_samples, i+1)
plt.imshow(image_np)
plt.title(f"真实: {class_names[true_label]}\n预测: {class_names[pred_label]}")
plt.axis('off')
plt.tight_layout()
plt.savefig('predictions.png')
plt.show()
# 可视化训练过程
def plot_training_history(train_losses, test_losses, train_accs, test_accs):
epochs = range(1, len(train_losses)+1)
# 创建1行2列的子图布局
plt.figure(figsize=(18, 6))
# 左子图:损失曲线
plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, 'b-', linewidth=2, label='训练损失')
plt.plot(epochs, test_losses, 'r-', linewidth=2, label='验证损失')
plt.title('训练与验证损失', fontsize=16)
plt.xlabel('轮次', fontsize=12)
plt.ylabel('损失', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
# 添加每个点的数值标签
for i, (tl, vl) in enumerate(zip(train_losses, test_losses)):
if i % 3 == 0 or i == len(train_losses)-1: # 每3个点或最后一个点添加标签
plt.annotate(f'{tl:.4f}', xy=(i+1, tl), xytext=(i+1, tl+0.01),
fontsize=8, ha='center')
plt.annotate(f'{vl:.4f}', xy=(i+1, vl), xytext=(i+1, vl+0.01),
fontsize=8, ha='center')
# 右子图:准确率曲线
plt.subplot(1, 2, 2)
plt.plot(epochs, train_accs, 'b-', linewidth=2, label='训练准确率')
plt.plot(epochs, test_accs, 'r-', linewidth=2, label='验证准确率')
plt.title('训练与验证准确率', fontsize=16)
plt.xlabel('轮次', fontsize=12)
plt.ylabel('准确率 (%)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
# 添加每个点的数值标签
for i, (ta, va) in enumerate(zip(train_accs, test_accs)):
if i % 3 == 0 or i == len(train_accs)-1: # 每3个点或最后一个点添加标签
plt.annotate(f'{ta:.2f}%', xy=(i+1, ta), xytext=(i+1, ta+1),
fontsize=8, ha='center')
plt.annotate(f'{va:.2f}%', xy=(i+1, va), xytext=(i+1, va+1),
fontsize=8, ha='center')
plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()
# 主函数
def main():
# 调用字体设置函数
set_chinese_font()
# 数据目录
data_dir = r'D:\Codes\新'
# 确定操作系统,设置合适的num_workers
system = platform.system()
if system == 'Windows':
num_workers = 0 # Windows系统上使用0避免多进程问题
else:
num_workers = 2 # 其他系统可以使用多进程加速
# 加载数据集计算均值和标准差
full_dataset = datasets.ImageFolder(root=data_dir, transform=transforms.ToTensor())
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, _ = random_split(full_dataset, [train_size, test_size])
train_loader_for_stats = DataLoader(train_dataset, batch_size=len(train_dataset),
shuffle=False, num_workers=num_workers)
data, _ = next(iter(train_loader_for_stats))
mean = data.mean(dim=(0, 2, 3))
std = data.std(dim=(0, 2, 3))
# 增强数据增强以减少过拟合
train_transform = transforms.Compose([
transforms.Resize((32, 32)), # 稍大尺寸以便更多裁剪
transforms.RandomCrop(28, padding=4),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.2), # 增加垂直翻转
transforms.RandomRotation(20), # 增加旋转角度
transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.15), # 增强颜色抖动
transforms.RandomAffine(degrees=0, translate=(0.15, 0.15), scale=(0.85, 1.15)),
transforms.RandomGrayscale(p=0.1), # 增加灰度转换
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
transforms.RandomErasing(p=0.2, scale=(0.02, 0.25)) # 增加随机擦除
])
test_transform = transforms.Compose([
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std)
])
# 创建数据集
full_dataset = datasets.ImageFolder(root=data_dir, transform=train_transform)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
test_dataset.dataset.transform = test_transform
# 数据加载器
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
num_workers=num_workers)
# 初始化模型与设备
model = ImprovedRMAN(num_classes=9, dropout_rate=0.4) # 适当的dropout率
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Using device: {device}")
# 损失函数与优化器 - 增加L2正则化
criterion = nn.CrossEntropyLoss()
# 增加weight_decay增强正则化
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
# 使用更智能的学习率调度器
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)
# 训练记录
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
# 训练循环 - 改为20轮
num_epochs = 30
best_accuracy = 0.0
patience = 6 # 增加早停耐心值
early_stopping_counter = 0
# 创建主进度条
main_pbar = tqdm(range(num_epochs), desc="Overall Training", position=0, leave=True)
for epoch in main_pbar:
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
# 创建训练批次进度条
train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", leave=False, position=1)
for images, labels in train_pbar:
images, labels = images.to(device), labels.to(device)
# 前向传播
outputs = model(images)
loss = criterion(outputs, labels)
# 反向传播
optimizer.zero_grad()
loss.backward()
# 添加梯度裁剪防止梯度爆炸
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
running_loss += loss.item()
# 计算准确率
_, predicted = torch.max(outputs.data, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
# 更新训练进度条信息
current_loss = loss.item()
current_acc = 100 * (predicted == labels).sum().item() / labels.size(0)
train_pbar.set_postfix(loss=f"{current_loss:.4f}", acc=f"{current_acc:.2f}%")
# 关闭训练批次进度条
train_pbar.close()
# 计算训练指标
avg_train_loss = running_loss / len(train_loader)
train_accuracy = 100 * correct_train / total_train
train_losses.append(avg_train_loss)
train_accuracies.append(train_accuracy)
# 更新主进度条信息
main_pbar.set_postfix(
train_loss=f"{avg_train_loss:.4f}",
train_acc=f"{train_accuracy:.2f}%"
)
# 验证
model.eval()
test_loss = 0.0
correct_test = 0
total_test = 0
# 创建验证进度条
test_pbar = tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]", leave=False, position=1)
with torch.no_grad():
for images, labels in test_pbar:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total_test += labels.size(0)
correct_test += (predicted == labels).sum().item()
# 更新验证进度条信息
current_test_acc = 100 * (predicted == labels).sum().item() / labels.size(0)
test_pbar.set_postfix(acc=f"{current_test_acc:.2f}%")
# 关闭验证进度条
test_pbar.close()
avg_test_loss = test_loss / len(test_loader)
test_accuracy = 100 * correct_test / total_test
test_losses.append(avg_test_loss)
test_accuracies.append(test_accuracy)
# 更新主进度条信息
main_pbar.set_postfix(
train_loss=f"{avg_train_loss:.4f}",
train_acc=f"{train_accuracy:.2f}%",
test_loss=f"{avg_test_loss:.4f}",
test_acc=f"{test_accuracy:.2f}%"
)
# 更新学习率(基于验证准确率)
scheduler.step(test_accuracy)
# 早停机制
if test_accuracy > best_accuracy:
best_accuracy = test_accuracy
early_stopping_counter = 0
torch.save(model.state_dict(), 'best_improved_rman_model.pth')
tqdm.write(f'Epoch [{epoch+1}/{num_epochs}]: New best model saved with accuracy: {best_accuracy:.2f}%')
else:
early_stopping_counter += 1
if early_stopping_counter >= patience:
tqdm.write(f'Epoch [{epoch+1}/{num_epochs}]: Early stopping after {patience} epochs without improvement')
break
# 关闭主进度条
main_pbar.close()
# 加载最佳模型
model.load_state_dict(torch.load('best_improved_rman_model.pth'))
model.eval()
# 最终评估
correct = 0
total = 0
all_labels = []
all_preds = []
# 创建最终评估进度条
eval_pbar = tqdm(test_loader, desc="Final Evaluation", position=0, leave=True)
with torch.no_grad():
for images, labels in eval_pbar:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
all_labels.extend(labels.cpu().numpy())
all_preds.extend(predicted.cpu().numpy())
# 更新评估进度条信息
current_acc = 100 * (predicted == labels).sum().item() / labels.size(0)
eval_pbar.set_postfix(acc=f"{current_acc:.2f}%")
# 关闭评估进度条
eval_pbar.close()
accuracy = 100 * correct / total
print(f'Final Test Accuracy: {accuracy:.2f}%')
# 执行可视化和保存模型
class_names = full_dataset.classes
visualize_predictions(model, test_dataset, mean, std, num_samples=5)
plot_training_history(train_losses, test_losses, train_accuracies, test_accuracies)
plot_confusion_matrix(all_labels, all_preds, class_names)
torch.save(model.state_dict(), 'final_improved_rman_model.pth')
print("模型已保存为 'final_improved_rman_model.pth'")
if __name__ == '__main__':
# 在Windows上使用多进程时需要的保护措施
import multiprocessing
multiprocessing.freeze_support()
main()
优化此模型,使得测试准确率达到97.5%左右
最新发布