前言:
项目简介:
本项目旨在通过使用深度学习技术实现对图像进行分类。我采用了DDPM(Deep Dynamic Probabilistic Modeling)和InceptionNext两个模型的结合,以提高图像分类的准确性和性能。同时,我还使用了数据增强技术来增加训练数据的多样性,从而提升模型的泛化能力。
DDPM部分:Pytorch基于DDPM+InceptionNext+数据增强的图像分类(一)
InceptionNext实现图像分类:
论文:https://wanghao.blog.youkuaiyun.com/article/details/131347001?spm=1001.2014.3001.5502
官方源码:https://github.com/sail-sg/inceptionnext
在该文中,通过以ConvNeXt为基础并改进深度卷积模块来解决在保持大内核cnn性能的同时加速它们的问题。保留一些通道不变,仅对部分通道进行深度卷积操作。将深度卷积的大核分解成几组具有Inception风格的小核[56,57,55]。具体来说,对于处理通道,1/3的通道以3 × 3的核进行,1/3的通道以1 × k进行,剩下的1/3的通道以k × 1进行。有了这个新的简单而廉价的运算符,称为“Inception深度卷积”,构建的模型InceptionNeXt在准确性和速度之间实现了更好的权衡。InceptionNeXt-T实现了比ConvNeXt-T更高的准确率,同时享有与ResNet-50相似的1.6倍的训练吞吐量加速。
训练集:
我的训练集结构如下:
{
"0": "Huanglong_disease",
"1": "Magnesium_deficiency",
"2": "Normal"
}
通过makeDataset.py创建Dataset数据集标准格式:
image_list=glob.glob('data1/*/*.*')
print(image_list)
file_dir='data'
if os.path.exists(file_dir):
print('true')
#os.rmdir(file_dir)
shutil.rmtree(file_dir)
os.makedirs(file_dir)
else:
os.makedirs(file_dir)
from sklearn.model_selection import train_test_split
trainval_files, val_files = train_test_split(image_list, test_size=0.2, random_state=42)
train_dir='train'
val_dir='val'
train_root=os.path.join(file_dir,train_dir)
val_root=os.path.join(file_dir,val_dir)
for file in trainval_files:
file_class=file.replace("\\","/").split('/')[-2]
file_name=file.replace("\\","/").split('/')[-1]
file_class=os.path.join(train_root,file_class)
if not os.path.isdir(file_class):
os.makedirs(file_class)
shutil.copy(file, file_class + '/' + file_name)
for file in val_files:
file_class=file.replace("\\","/").split('/')[-2]
file_name=file.replace("\\","/").split('/')[-1]
file_class=os.path.join(val_root,file_class)
if not os.path.isdir(file_class):
os.makedirs(file_class)
shutil.copy(file, file_class + '/' + file_name)
标准数据集结构如下:
官方给出的InceptionNext模型:
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import trunc_normal_, DropPath
from timm.models.registry import register_model
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from functools import partial
class PartialConv2d(nn.Module):
r"""
Conduct convolution on partial channels.
"""
def __init__(self, in_channels, out_channels, kernel_size,
conv_ratio=1.0,
stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs,
):
super().__init__()
in_chs = int(in_channels * conv_ratio)
out_chs = int(out_channels * conv_ratio)
gps = int(groups * conv_ratio) or 1 # groups should be at least 1
self.conv = nn.Conv2d(in_chs, out_chs,
kernel_size=kernel_size,
stride=stride, padding=padding, dilation=dilation,
groups=gps, bias=bias,
**kwargs,
)
self.split_indices = (in_channels - in_chs, in_chs)
def forward(self, x):
identity, conv = torch.split(x, self.split_indices, dim=1)
return torch.cat(
(identity, self.conv(conv)),
dim=1,
)
class Block(nn.Module):
r""" ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch
Args:
dim (int): Number of input channels.
drop_path (float): Stochastic depth rate. Default: 0.0
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
"""
def __init__(self, dim, kernel_size=7,
drop_path=0., layer_scale_init_value=1e-6,
conv_fn=nn.Conv2d,
):
super().__init__()
self.dwconv = conv_fn(dim, dim, kernel_size=kernel_size, padding=kernel_size//2, groups=dim) # depthwise conv
self.norm = LayerNorm(dim, eps=1e-6)
self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
self.act = nn.GELU()
self.pwconv2 = nn.Linear(4 * dim, dim)
self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
def forward(self, x):
input = x
x = self.dwconv(x)
x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
x = self.norm(x)
x = self.pwconv1(x)
x = self.act(x)
x = self.pwconv2(x)
if self.gamma is not None:
x = self.gamma * x
x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
x = input + self.drop_path(x)
return x
class ConvNeXt(nn.Module):
r""" ConvNeXt
A PyTorch impl of : `A ConvNet for the 2020s` -
https://arxiv.org/pdf/2201.03545.pdf
Args:
in_chans (int): Number of input image channels. Default: 3
num_classes (int): Number of classes for classification head. Default: 1000
depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
drop_path_rate (float): Stochastic depth rate. Default: 0.
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
"""
def __init__(self, in_chans=3, num_classes=1000,
depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
layer_scale_init_value=1e-6, head_init_scale=1.,
kernel_sizes=7, conv_fns=nn.Conv2d,
**kwargs,
):
super().__init__()
num_stages = len(depths)
self.num_stages = num_stages
if not isinstance(kernel_sizes, (list, tuple)):
kernel_sizes = [kernel_sizes] * num_stages
if not isinstance(conv_fns, (list, tuple)):
conv_fns = [conv_fns] * num_stages
self.num_classes = num_classes
self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
stem = nn.Sequential(
nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
)
self.downsample_layers.append(stem)
for i in range(self.num_stages - 1):
downsample_layer = nn.Sequential(
LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
)
self.downsample_layers.append(downsample_layer)
self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
cur = 0
for i in range(self.num_stages):
stage = nn.Sequential(
*[Block(dim=dims[i], drop_path=dp_rates[cur + j],
kernel_size=kernel_sizes[i],
layer_scale_init_value=layer_scale_init_value,
conv_fn=conv_fns[i],
) for j in range(depths[i])]
)
self.stages.append(stage)
cur += depths[i]
self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
self.head = nn.Linear(dims[-1], num_classes)
self.apply(self._init_weights)
self.head.weight.data.mul_(head_init_scale)
self.head.bias.data.mul_(head_init_scale)
def _init_weights(self, m):
if isinstance(m, (nn.Conv2d, nn.Linear)):
trunc_normal_(m.weight, std=.02)
nn.init.constant_(m.bias, 0)
def forward_features(self, x):
for i in range(self.num_stages):
x = self.downsample_layers[i](x)
x = self.stages[i](x)
return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
def forward(self, x):
x = self.forward_features(x)
x = self.head(x)
return x
class LayerNorm(nn.Module):
r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
shape (batch_size, height, width, channels) while channels_first corresponds to inputs
with shape (batch_size, channels, height, width).
"""
def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
super().__init__()
self.weight = nn.Parameter(torch.ones(normalized_shape))
self.bias = nn.Parameter(torch.zeros(normalized_shape))
self.eps = eps
self.data_format = data_format
if self.data_format not in ["channels_last", "channels_first"]:
raise NotImplementedError
self.normalized_shape = (normalized_shape, )
def forward(self, x):
if self.data_format == "channels_last":
return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
elif self.data_format == "channels_first":
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight[:, None, None] * x + self.bias[:, None, None]
return x
def _cfg(url='', **kwargs):
return {
'url': url,
'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
'crop_pct': 0.875, 'interpolation': 'bicubic',
'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
'first_conv': 'stem.0', 'classifier': 'head.fc',
**kwargs
}
model_urls = {
"convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
"convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
"convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
"convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
"convnext_tiny_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_224.pth",
"convnext_small_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth",
"convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth",
"convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth",
"convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth",
# add by this InceptionNeXt repo
"convnext_tiny_k5_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k5_1k_224_ema.pth",
"convnext_tiny_k3_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_1k_224_ema.pth",
"convnext_tiny_k3_par1_2_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_par1_2_1k_224_ema.pth",
"convnext_tiny_k3_par3_8_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_par3_8_1k_224_ema.pth",
"convnext_tiny_k3_par1_4_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_par1_4_1k_224_ema.pth",
"convnext_tiny_k3_par1_8_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_par1_8_1k_224_ema.pth",
"convnext_tiny_k3_par1_16_1k": "https://github.com/sail-sg/inceptionnext/releases/download/model/convnext_tiny_k3_par1_16_1k_224_ema.pth",
}
@register_model
def convnext_tiny(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_22k'] if in_22k else model_urls['convnext_tiny_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_tiny_k5(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=5,
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k5_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3_par1_2(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
conv_fns=partial(PartialConv2d, conv_ratio=0.5),
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_par1_2_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3_par3_8(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
conv_fns=partial(PartialConv2d, conv_ratio=3/8),
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_par3_8_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3_par1_4(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
conv_fns=partial(PartialConv2d, conv_ratio=0.25),
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_par1_4_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3_par1_8(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
conv_fns=partial(PartialConv2d, conv_ratio=0.125),
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_par1_8_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_tiny_k3_par1_16(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
kernel_sizes=3,
conv_fns=partial(PartialConv2d, conv_ratio=1/16),
**kwargs)
assert not in_22k, "22k pre-trained model not available"
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_tiny_k3_par1_16_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint)
return model
@register_model
def convnext_small(pretrained=False,in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_small_22k'] if in_22k else model_urls['convnext_small_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_base(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_base_22k'] if in_22k else model_urls['convnext_base_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_large(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
model.default_cfg = _cfg()
if pretrained:
url = model_urls['convnext_large_22k'] if in_22k else model_urls['convnext_large_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_xlarge(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
model.default_cfg = _cfg()
if pretrained:
assert in_22k, "only ImageNet-22K pre-trained ConvNeXt-XL is available; please set in_22k=True"
url = model_urls['convnext_xlarge_22k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
model.load_state_dict(checkpoint["model"])
return model
训练函数train.py
def train(model, device, train_loader, optimizer, epoch,model_ema):
model.train()
loss_meter = AverageMeter()
acc1_meter = AverageMeter()
acc5_meter = AverageMeter()
total_num = len(train_loader.dataset)
print(total_num, len(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device, non_blocking=True), Variable(target).to(device,non_blocking=True)
samples, targets = mixup_fn(data, target)
output = model(samples)
optimizer.zero_grad()
if use_amp:
with torch.cuda.amp.autocast():
loss = torch.nan_to_num(criterion_train(output, targets))
scaler.scale(loss).backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD)
# Unscales gradients and calls
# or skips optimizer.step()
scaler.step(optimizer)
# Updates the scale for next iteration
scaler.update()
else:
loss = criterion_train(output, targets)
loss.backward()
# torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD)
optimizer.step()
if model_ema is not None:
model_ema.update(model)
torch.cuda.synchronize()
lr = optimizer.state_dict()['param_groups'][0]['lr']
loss_meter.update(loss.item(), target.size(0))
acc1, acc5 = accuracy(output, target, topk=(1, 5))
loss_meter.update(loss.item(), target.size(0))
acc1_meter.update(acc1.item(), target.size(0))
acc5_meter.update(acc5.item(), target.size(0))
if (batch_idx + 1) % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLR:{:.9f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.item(), lr))
ave_loss =loss_meter.avg
acc = acc1_meter.avg
print('epoch:{}\tloss:{:.2f}\tacc:{:.2f}'.format(epoch, ave_loss, acc))
return ave_loss, acc
#验证过程
@torch.no_grad()
def val(model, device, test_loader):
global Best_ACC
model.eval()
loss_meter = AverageMeter()
acc1_meter = AverageMeter()
acc5_meter = AverageMeter()
total_num = len(test_loader.dataset)
print(total_num, len(test_loader))
val_list = []
pred_list = []
for data, target in test_loader:
for t in target:
val_list.append(t.data.item())
data, target = data.to(device,non_blocking=True), target.to(device,non_blocking=True)
output = model(data)
loss = criterion_val(output, target)
_, pred = torch.max(output.data, 1)
for p in pred:
pred_list.append(p.data.item())
acc1, acc5 = accuracy(output, target, topk=(1, 5))
loss_meter.update(loss.item(), target.size(0))
acc1_meter.update(acc1.item(), target.size(0))
acc5_meter.update(acc5.item(), target.size(0))
acc = acc1_meter.avg
print('\nVal set: Average loss: {:.4f}\tAcc1:{:.3f}%\tAcc5:{:.3f}%\n'.format(
loss_meter.avg, acc, acc5_meter.avg))
if acc > Best_ACC:
if isinstance(model, torch.nn.DataParallel):
torch.save(model.module, file_dir + '/' + 'best.pth')
else:
torch.save(model, file_dir + '/' + 'best.pth')
Best_ACC = acc
if isinstance(model, torch.nn.DataParallel):
state = {
'epoch': epoch,
'state_dict': model.module.state_dict(),
'Best_ACC':Best_ACC
}
if use_ema:
state['state_dict_ema']=model.module.state_dict()
torch.save(state, file_dir + "/" + 'model_' + str(epoch) + '_' + str(round(acc, 3)) + '.pth')
else:
state = {
'epoch': epoch,
'state_dict': model.state_dict(),
'Best_ACC': Best_ACC
}
if use_ema:
state['state_dict_ema']=model.state_dict()
torch.save(state, file_dir + "/" + 'model_' + str(epoch) + '_' + str(round(acc, 3)) + '.pth')
return val_list, pred_list, loss_meter.avg, acc
def seed_everything(seed=42):
os.environ['PYHTONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
if __name__ == '__main__':
file_dir = 'checkpoints/inceptionnext/'
if os.path.exists(file_dir):
print('true')
os.makedirs(file_dir,exist_ok=True)
else:
os.makedirs(file_dir)
# 设置全局参数
model_lr = 1e-3
BATCH_SIZE = 16
EPOCHS = 100
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
use_amp = True # 是否使用混合精度
use_dp = True
classes = 3
resume =None
resume = "checkpoints/Inceptionnext/model_46_52.632.pth"
CLIP_GRAD = 5.0
Best_ACC = 0
use_ema=False
model_ema_decay=0.9998
start_epoch=1
seed=1
seed_everything(seed)
# 数据预处理7
transform = transforms.Compose([
transforms.RandomRotation(10),
transforms.GaussianBlur(kernel_size=(5,5),sigma=(0.1, 3.0)),
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4574203, 0.51029885, 0.2715279], std= [0.19837953, 0.16945386, 0.16967356])
])
transform_test = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4574203, 0.51029885, 0.2715279], std= [0.19837953, 0.16945386, 0.16967356])
])
mixup_fn = Mixup(
mixup_alpha=0.8, cutmix_alpha=1.0, cutmix_minmax=None,
prob=0.1, switch_prob=0.5, mode='batch',
label_smoothing=0.1, num_classes=classes)
# 读取数据
dataset_train = datasets.ImageFolder('data/train', transform=transform)
dataset_test = datasets.ImageFolder("data/val", transform=transform_test)
with open('class.txt', 'w') as file:
file.write(str(dataset_train.class_to_idx))
with open('class.json', 'w', encoding='utf-8') as file:
file.write(json.dumps(dataset_train.class_to_idx))
# 导入数据
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True,drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)
criterion_train = SoftTargetCrossEntropy()
criterion_val = torch.nn.CrossEntropyLoss()
#设置模型
model_ft = inceptionnext_small(pretrained=True)
#print(model_ft)
num_fr=model_ft.head.fc2.in_features
model_ft.head.fc2 =nn.Linear(num_fr,classes)
#print(model_ft)
if resume:
model=torch.load(resume)
print(model['state_dict'].keys())
model_ft.load_state_dict(model['state_dict'])
Best_ACC=model['Best_ACC']
start_epoch=model['epoch']+1
model_ft.to(DEVICE)
# 选择简单暴力的Adam优化器,学习率调低
optimizer = optim.AdamW(model_ft.parameters(),lr=model_lr)
cosine_schedule = optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=20, eta_min=1e-6)
if use_amp:
scaler = torch.cuda.amp.GradScaler()
if torch.cuda.device_count() > 1 and use_dp:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model_ft = torch.nn.DataParallel(model_ft)
if use_ema:
model_ema = ModelEma(
model_ft,
decay=model_ema_decay,
device=DEVICE,
resume=resume)
else:
model_ema=None
# 训练与验证
is_set_lr = False
log_dir = {}
train_loss_list, val_loss_list, train_acc_list, val_acc_list, epoch_list = [], [], [], [], []
if resume and os.path.isfile(file_dir+"result.json"):
with open(file_dir+'result.json', 'r', encoding='utf-8') as file:
logs = json.load(file)
train_acc_list = logs['train_acc']
train_loss_list = logs['train_loss']
val_acc_list = logs['val_acc']
val_loss_list = logs['val_loss']
epoch_list = logs['epoch_list']
for epoch in range(start_epoch, EPOCHS + 1):
epoch_list.append(epoch)
log_dir['epoch_list'] = epoch_list
train_loss, train_acc = train(model_ft, DEVICE, train_loader, optimizer, epoch,model_ema)
train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
log_dir['train_acc'] = train_acc_list
log_dir['train_loss'] = train_loss_list
if use_ema:
val_list, pred_list, val_loss, val_acc = val(model_ema.ema, DEVICE, test_loader)
else:
val_list, pred_list, val_loss, val_acc = val(model_ft, DEVICE, test_loader)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)
log_dir['val_acc'] = val_acc_list
log_dir['val_loss'] = val_loss_list
log_dir['best_acc'] = Best_ACC
with open(file_dir + '/result.json', 'w', encoding='utf-8') as file:
file.write(json.dumps(log_dir))
print(classification_report(val_list, pred_list, target_names=dataset_train.class_to_idx))
if epoch < 600:
cosine_schedule.step()
else:
if not is_set_lr:
for param_group in optimizer.param_groups:
param_group["lr"] = 1e-6
is_set_lr = True
fig = plt.figure(1)
plt.plot(epoch_list, train_loss_list, 'r-', label=u'Train Loss')
# 显示图例
plt.plot(epoch_list, val_loss_list, 'b-', label=u'Val Loss')
plt.legend(["Train Loss", "Val Loss"], loc="upper right")
plt.xlabel(u'epoch')
plt.ylabel(u'loss')
plt.title('Model Loss ')
plt.savefig(file_dir + "/loss.png")
plt.close(1)
fig2 = plt.figure(2)
plt.plot(epoch_list, train_acc_list, 'r-', label=u'Train Acc')
plt.plot(epoch_list, val_acc_list, 'b-', label=u'Val Acc')
plt.legend(["Train Acc", "Val Acc"], loc="lower right")
plt.title("Model Acc")
plt.ylabel("acc")
plt.xlabel("epoch")
plt.savefig(file_dir + "/acc.png")
plt.close(2)
采用数据增强:
#随机旋转,模糊,颜色抖动
transform = transforms.Compose([
transforms.RandomRotation(10),
transforms.GaussianBlur(kernel_size=(5,5),sigma=(0.1, 3.0)),
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4574203, 0.51029885, 0.2715279], std= [0.19837953, 0.16945386, 0.16967356])
])
#mixup;
mixup_fn = Mixup(
mixup_alpha=0.8, cutmix_alpha=1.0, cutmix_minmax=None,
prob=0.1, switch_prob=0.5, mode='batch',
label_smoothing=0.1, num_classes=classes)
采用EMA指数移动平均:
import torch
class EMA():
def __init__(self, alpha):
self.alpha = alpha # 初始化平滑因子alpha
self.average = None # 初始化平均值为空
self.count = 0 # 初始化计数器为0
def update(self, x):
if self.average is None: # 如果平均值为空,则将其初始化为与x相同大小的全零张量
self.average = torch.zeros_like(x)
self.average = self.alpha * x + (1 - self.alpha) * self.average # 更新平均值
self.count += 1 # 更新计数器
def get(self):
return self.average / (1 - self.alpha ** self.count) # 根据计数器和平滑因子计算EMA值,并返回平均值除以衰减系数的结果
炼丹效果:
最终在验证集中的loss是0.095,ACC达到99.62%!