分组稠密层，DenseGroup 使用MNIST简易实验-优快云博客

本文链接：https://blog.youkuaiyun.com/ONE_SIX_MIX/article/details/90740592

博客介绍了分组稠密层，其和分组卷积一样可大量减少参数。通过mnist简易实验对比分组稠密层（group_dense）和普通层（dense），发现group_dense运算和验证集准确率提升速度慢，但最终验证准确率相同，且使用group_dense的模型大小远小于普通dense。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

既然有分组卷积，那就肯定有分组稠密层。
分组稠密层，跟分组卷积一样都可以大量减少参数，不过貌似没见过论文用过，不知效果如何。

下面使用mnist的简易实验结果
group_dense 比 dense
运算速度慢点
验证集准确率提升速度慢点
最终验证准确率都是97%
使用普通dense的模型大小为2M
使用group_dense模型大小为0.2M

做这种实验很无聊啊。。想想都知道跟普通卷积与分组卷积的关系一样。

简易实验代码：

import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torchvision.datasets.mnist import MNIST
from collections.abc import Callable as _Callable


# 定义模型
class Dense(torch.jit.ScriptModule):
    def __init__(self, in_feat, out_feat, act=None, bias=True, *, norm_layer_kwargs={}):
        super().__init__()

        layers = []
        den = nn.Linear(in_feat, out_feat, bias=bias is True)
        layers.append(den)

        if isinstance(bias, _Callable):
            layers.append(bias(out_feat, **norm_layer_kwargs))

        if act:
            layers.append(act)

        self.layers = nn.Sequential(*layers)

    @torch.jit.script_method
    def forward(self, inputs):
        outputs = self.layers(inputs)
        return outputs


class Dense_Group(torch.jit.ScriptModule):
    __constants__ = ['groups']
    def __init__(self, in_feat, out_feat, act=None, bias=True, groups=16, *, norm_layer_kwargs={}):
        super().__init__()
        self.groups = groups

        in_feat_g = in_feat // groups
        out_feat_g = out_feat // groups

        assert in_feat_g * groups == in_feat, 'Found in_feat_g * groups != in_feat'
        assert out_feat_g * groups == out_feat, 'Found out_feat_g * groups != out_feat'

        dense_group = []
        for i in range(groups):
            den = nn.Linear(in_feat_g, out_feat_g, bias=bias is True)
            dense_group.append(den)

        self.dense_group = nn.ModuleList(dense_group)

        layers = []

        if isinstance(bias, _Callable):
            layers.append(bias(out_feat, **norm_layer_kwargs))

        if act:
            layers.append(act)

        if len(layers) > 0:
            self.layers = nn.Sequential(*layers)
        else:
            self.layers = nn.Identity()

    # @torch.jit.script_method
    def forward(self, inputs):
        inputs_groups = torch.chunk(inputs, self.groups, 1)
        outputs_groups = []
        for i, m in enumerate(self.dense_group):
            outputs_groups.append(m(inputs_groups[i]))
        outputs = torch.cat(outputs_groups, 1)
        outputs = self.layers(outputs)
        return outputs


class Net(nn.Module):
    def __init__(self, use_group_dense=True):
        super().__init__()
        act = nn.LeakyReLU(0.01)
        norm1d = nn.BatchNorm1d

        if use_group_dense:
            self.dense1 = Dense_Group(28 * 28 * 1, 512, act, norm1d, 16)
            self.dense2 = Dense_Group(512, 384, act, norm1d, 16)
            self.dense3 = Dense_Group(384, 256, act, norm1d, 8)
            self.dense4 = Dense_Group(256, 128, act, norm1d, 8)
        else:
            self.dense1 = Dense(28 * 28 * 1, 512, act, norm1d)
            self.dense2 = Dense(512, 384, act, norm1d)
            self.dense3 = Dense(384, 256, act, norm1d)
            self.dense4 = Dense(256, 128, act, norm1d)

        self.dense5 = Dense(128, 10, None, True)

    def forward(self, x):
        y = torch.flatten(x, 1)
        y = self.dense1(y)
        y = self.dense2(y)
        y = self.dense3(y)
        y = self.dense4(y)
        y = self.dense5(y)
        return y


use_train_data_for_test = False
use_group_dense = True

train_dataset = MNIST('datasets', True, download=True)
x_train, y_train = train_dataset.data, train_dataset.targets
val_dataset = MNIST('datasets', False)
x_val, y_val = val_dataset.data, val_dataset.targets

y_train = np.asarray(y_train)
y_val = np.asarray(y_val)

x_train = np.float32(x_train[..., None]) / 255
x_val = np.float32(x_val[..., None]) / 255

del train_dataset
del val_dataset

if use_train_data_for_test:
    x_val, y_val = x_train, y_train


def train(model, device, optimizer, epoch, batch_size=100):
    model.train()
    batch_count = int(np.round(len(x_train) / batch_size))
    for batch_idx in range(batch_count):

        data = np.transpose(x_train[batch_idx*batch_size : (batch_idx+1)*batch_size], [0, 3, 1, 2])
        target = y_train[batch_idx*batch_size : (batch_idx+1)*batch_size]
        data = torch.Tensor(data)
        target = torch.Tensor(target).long()
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(x_train),
                       100. * batch_idx / batch_count, loss.item()))


def test(model, device):
    model.eval()
    test_loss = 0
    correct = 0
    batch_size = 100
    batch_count = int(np.round(len(x_val) / batch_size))
    with torch.no_grad():
        for batch_idx in range(batch_count):
            data = np.transpose(x_val[batch_idx * batch_size: (batch_idx + 1) * batch_size], [0, 3, 1, 2])
            target = y_val[batch_idx * batch_size: (batch_idx + 1) * batch_size]
            data = torch.Tensor(data)
            target = torch.Tensor(target).long()
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(x_val)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(x_val),
        100. * correct / len(x_val)))



# Training settings

use_cuda = torch.cuda.is_available()

torch.manual_seed(int(time.time()))

device = torch.device("cuda" if use_cuda else "cpu")

model = Net(use_group_dense).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    train(model, device, optimizer, epoch, 1000)
    test(model, device)

torch.save(model.state_dict(), "mnist_cnn.pt")