既然有分组卷积,那就肯定有分组稠密层。
分组稠密层,跟分组卷积一样都可以大量减少参数,不过貌似没见过论文用过,不知效果如何。
下面使用mnist的简易实验结果
group_dense 比 dense
运算速度慢点
验证集准确率提升速度慢点
最终验证准确率都是97%
使用普通dense的模型大小为2M
使用group_dense模型大小为0.2M
做这种实验很无聊啊。。想想都知道跟普通卷积与分组卷积的关系一样。
简易实验代码:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torchvision.datasets.mnist import MNIST
from collections.abc import Callable as _Callable
# 定义模型
class Dense(torch.jit.ScriptModule):
def __init__(self, in_feat, out_feat, act=None, bias=True, *, norm_layer_kwargs={}):
super().__init__()
layers = []
den = nn.Linear(in_feat, out_feat, bias=bias is True)
layers.append(den)
if isinstance(bias, _Callable):
layers.append(bias(out_feat, **norm_layer_kwargs))
if act:
layers.append(act)
self.layers = nn.Sequential(*layers)
@torch.jit.script_method
def forward(self, inputs):
outputs = self.layers(inputs)
return outputs
class Dense_Group(torch.jit.ScriptModule):
__constants__ = ['groups']
def __init__(self, in_feat, out_feat, act=None, bias=True, groups=16, *, norm_layer_kwargs={}):
super().__init__()
self.groups = groups
in_feat_g = in_feat // groups
out_feat_g = out_feat // groups
assert in_feat_g * groups == in_feat, 'Found in_feat_g * groups != in_feat'
assert out_feat_g * groups == out_feat, 'Found out_feat_g * groups != out_feat'
dense_group = []
for i in range(groups):
den = nn.Linear(in_feat_g, out_feat_g, bias=bias is True)
dense_group.append(den)
self.dense_group = nn.ModuleList(dense_group)
layers = []
if isinstance(bias, _Callable):
layers.append(bias(out_feat, **norm_layer_kwargs))
if act:
layers.append(act)
if len(layers) > 0:
self.layers = nn.Sequential(*layers)
else:
self.layers = nn.Identity()
# @torch.jit.script_method
def forward(self, inputs):
inputs_groups = torch.chunk(inputs, self.groups, 1)
outputs_groups = []
for i, m in enumerate(self.dense_group):
outputs_groups.append(m(inputs_groups[i]))
outputs = torch.cat(outputs_groups, 1)
outputs = self.layers(outputs)
return outputs
class Net(nn.Module):
def __init__(self, use_group_dense=True):
super().__init__()
act = nn.LeakyReLU(0.01)
norm1d = nn.BatchNorm1d
if use_group_dense:
self.dense1 = Dense_Group(28 * 28 * 1, 512, act, norm1d, 16)
self.dense2 = Dense_Group(512, 384, act, norm1d, 16)
self.dense3 = Dense_Group(384, 256, act, norm1d, 8)
self.dense4 = Dense_Group(256, 128, act, norm1d, 8)
else:
self.dense1 = Dense(28 * 28 * 1, 512, act, norm1d)
self.dense2 = Dense(512, 384, act, norm1d)
self.dense3 = Dense(384, 256, act, norm1d)
self.dense4 = Dense(256, 128, act, norm1d)
self.dense5 = Dense(128, 10, None, True)
def forward(self, x):
y = torch.flatten(x, 1)
y = self.dense1(y)
y = self.dense2(y)
y = self.dense3(y)
y = self.dense4(y)
y = self.dense5(y)
return y
use_train_data_for_test = False
use_group_dense = True
train_dataset = MNIST('datasets', True, download=True)
x_train, y_train = train_dataset.data, train_dataset.targets
val_dataset = MNIST('datasets', False)
x_val, y_val = val_dataset.data, val_dataset.targets
y_train = np.asarray(y_train)
y_val = np.asarray(y_val)
x_train = np.float32(x_train[..., None]) / 255
x_val = np.float32(x_val[..., None]) / 255
del train_dataset
del val_dataset
if use_train_data_for_test:
x_val, y_val = x_train, y_train
def train(model, device, optimizer, epoch, batch_size=100):
model.train()
batch_count = int(np.round(len(x_train) / batch_size))
for batch_idx in range(batch_count):
data = np.transpose(x_train[batch_idx*batch_size : (batch_idx+1)*batch_size], [0, 3, 1, 2])
target = y_train[batch_idx*batch_size : (batch_idx+1)*batch_size]
data = torch.Tensor(data)
target = torch.Tensor(target).long()
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(x_train),
100. * batch_idx / batch_count, loss.item()))
def test(model, device):
model.eval()
test_loss = 0
correct = 0
batch_size = 100
batch_count = int(np.round(len(x_val) / batch_size))
with torch.no_grad():
for batch_idx in range(batch_count):
data = np.transpose(x_val[batch_idx * batch_size: (batch_idx + 1) * batch_size], [0, 3, 1, 2])
target = y_val[batch_idx * batch_size: (batch_idx + 1) * batch_size]
data = torch.Tensor(data)
target = torch.Tensor(target).long()
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.cross_entropy(output, target, reduction='sum').item() # sum up batch loss
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(x_val)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(x_val),
100. * correct / len(x_val)))
# Training settings
use_cuda = torch.cuda.is_available()
torch.manual_seed(int(time.time()))
device = torch.device("cuda" if use_cuda else "cpu")
model = Net(use_group_dense).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
train(model, device, optimizer, epoch, 1000)
test(model, device)
torch.save(model.state_dict(), "mnist_cnn.pt")