参考链接:https://huaweicloud.youkuaiyun.com/63806da8dacf622b8df88246.html
学习记录:
nn.Sequential():
构造网络时可以使用此容器顺序添加模块,可以使用add_module(‘name’,nn.Conv2d())添加模块(像是列表的append),添加时重名模块会进行覆盖;也可以创建列表,然后append模块,然后使用nn.Sequential(*listname)创建
残差块:
简单理解就是卷积后的块加上卷积前的块,先加再relu;需要注意步长、channel是否一致,可以使用1x1的卷积核改变channel使其一致
构造网络时如果每个块除了channel都一样的话可以使用cfg循环构造,也可以专门写一个make_layer函数
动态调整学习率:
scheduler=torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,milestones=[int(epochs*0.5),int(epochs*0.75)],gamma=0.1)
milestones是一个列表,比如[10,15,20]就是在这几次epoch进行lr=lr*gamma的操作,在每个epoch末尾使用scheduler.step()更新lr,可以使用scheduler.get_last_lr()查看lr
保存模型:
torch.save(model,'./cifar10model/model_{}.pt'.format(epoch+1))
保存语句可以放到判断是否bestacc的块中
import time
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms,datasets
class Block(nn.Module):
def __init__(self,inchannel,outchannel,res=True):
super(Block,self).__init__()
self.res=res
self.block=nn.Sequential(
nn.Conv2d(inchannel,outchannel,kernel_size=3,padding=1,bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(),
nn.Conv2d(outchannel,outchannel,kernel_size=3,padding=1,bias=False),
nn.BatchNorm2d(outchannel)
)
if inchannel!=outchannel:
self.shortx=nn.Sequential(
nn.Conv2d(inchannel,outchannel,kernel_size=1,bias=False),
nn.BatchNorm2d(outchannel),
)
else:
self.shortx=nn.Sequential()
self.relu=nn.ReLU()
def forward(self,x):
out=self.block(x)
if self.res:
out+=self.shortx(x)
return self.relu(out)
class Model(nn.Module):
def __init__(self,cfg=[64,'M',128,'M',256,'M',512,'M'],res=True):
super(Model,self).__init__()
self.res=res
self.cfg=cfg
self.inchannel=3
self.conv=self.make_layer()
self.layer=nn.Sequential(nn.Dropout(0.4),nn.Linear(2*2*512,10))
def make_layer(self):
layer=[]
for item in self.cfg:
if item=='M':
layer.append(nn.MaxPool2d(2))
else:
layer.append(Block(self.inchannel,item,res=self.res))
self.inchannel=item
return nn.Sequential(*layer)
def forward(self,x):
out=self.conv(x)
out=out.view(-1,2*2*512)
out=self.layer(out)
return out
batchsize=512
lr=0.01
allepoch=20
starttime=time.time()
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
traindata=datasets.CIFAR10(root='./cifar10',train=True,download=False,transform=transform)
testdata=datasets.CIFAR10(root='./cifar10',train=False,download=False,transform=transform)
trainloard=DataLoader(traindata,batch_size=batchsize,shuffle=True,drop_last=False)
testloard=DataLoader(testdata,batch_size=batchsize,shuffle=False,drop_last=False)
model=Model()
model.to(device)
credetion=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=lr,momentum=0.9)
schedul=torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,milestones=[int(allepoch*0.5),int(allepoch*0.75)],gamma=0.1)
def train(epochs):
bestacc=0.0
bestepoch=0
epochl=[]
lossl=[]
accl=[]
for epoch in range(epochs):
startepoch=time.time()
lost=0.0
acount=0
model.train()
for num,(x,y) in enumerate(trainloard):
x,y=x.to(device),y.to(device)
y_h=model(x)
optimizer.zero_grad()
loss=credetion(y_h,y)
loss.backward()
optimizer.step()
lost+=loss.item()*x.size(0)
acount+=x.size(0)
schedul.step()
acc=test()
if acc>bestacc:
bestacc,bestepoch=acc,epoch+1
torch.save(model,'./cifar10model/model_{}.pt'.format(epoch+1))
accl.append(acc)
epochl.append(epoch+1)
lossl.append(lost/acount)
print('Epoch{}:\n\tloss:{:.3f} acc:{:.3f} lr:{} '
'bestepoch:{} bestacc:{:.3f} time:{:.3f}'.format(
epoch+1,lost/acount,acc,schedul.get_last_lr(),bestepoch,bestacc,time.time()-startepoch
))
alltime=round(time.time()-starttime)
print('bestepoch{} bestacc:{:.3f} time:{}`m {}`s'.format(bestepoch,bestacc,alltime//60,alltime%60))
plt.plot(epochl,lossl)
plt.plot(epochl,accl)
plt.legend(['loss','acc'])
plt.savefig('./loss_and_acc.png')
def test():
model.eval()
with torch.no_grad():
acc = 0
acount = 0
for num,(x,y) in enumerate(testloard):
x,y=x.to(device),y.to(device)
y_h=model(x)
_,pre=torch.max(y_h,dim=1)
acc+=(pre==y).sum().item()
acount += x.size(0)
return acc/acount
train(allepoch)
遇到一个问题:交叉熵里面如果设置参数reduction='sum’的话运行的时候有点问题,准确率和loss基本都不动
另外,原博客构造残差的代码感觉有点问题,里面有个判断步长的,但是如果传入的步长不是1就会出问题