实验准备
基础网络搭建
为了实现神经网络的deep compression,首先要训练一个深度神经网络,为了方便实现,这里实现一个两层卷积+两层MLP的神经网络
class net(pt.nn.Module):
def __init__(self):
super(net,self).__init__()
self.conv1 = pt.nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3,padding=1)
self.conv2 = pt.nn.Conv2d(in_channels=64,out_channels=256,kernel_size=3,padding=1)
self.fc1 = pt.nn.Linear(in_features=7*7*256,out_features=512)
self.fc2 = pt.nn.Linear(in_features=512,out_features=10)
self.pool = pt.nn.MaxPool2d(2)
def forward(self,x):
x = self.pool(pt.nn.functional.relu(self.conv1(x)))
x = self.pool(pt.nn.functional.relu(self.conv2(x)))
x = pt.nn.functional.relu(self.fc1(x.view((-1,7*7*256))))
return self.fc2(x)
model = net().cuda()
print(model)
print(model(pt.rand(1,1,28,28).cuda()))
net(
(conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fc1): Linear(in_features=12544, out_features=512, bias=True)
(fc2): Linear(in_features=512, out_features=10, bias=True)
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
tensor(1.00000e-02 *
[[-7.7157, 3.0435, -6.5732, 6.5343, -4.2159, -2.8651, -0.6792,
3.9223, -3.7523, 2.4532]], device='cuda:0')
基础网络训练
准备数据集
train_dataset = ptv.datasets.MNIST("./",download=True,transform=ptv.transforms.ToTensor())
test_dataset = ptv.datasets.MNIST("./",train=False,transform=ptv.transforms.ToTensor())
trainloader = pt.utils.data.DataLoader(train_dataset,shuffle=True,batch_size=128)
testloader = pt.utils.data.DataLoader(test_dataset,shuffle=True,batch_size=128)
代价函数与优化器
lossfunc = pt.nn.CrossEntropyLoss().cuda()
optimizer = pt.optim.Adam(model.parameters(),1e-4)
def acc(outputs,label):
_,data = pt.max(outputs,dim=1)
return pt.mean((data.float()==label.float()).float()).item()
网络训练
for _ in range(1):
for i,(data,label) in enumerate(trainloader):
data,label = data.cuda(),label.cuda()
model.zero_grad()
outputs = model(data)
loss = lossfunc(outputs,label)
loss.backward()
optimizer.step()
if i % 100 == 0:
print(i,acc(outputs,label))
0 0.1171875
100 0.8984375
200 0.953125
300 0.984375
400 0.96875
测试网络
def test_model(model,testloader):
result = []
for data,label in testloader:
data,label = data.cuda(),label