神经网络,整数转onehot向量实验,发现Adam优化器不是万能的,有时RMSprop优化器更好

本文通过实验探讨了不同数量级的整数转换为OneHot向量的问题,并对比了不同数据预处理方法和优化器对训练效果的影响。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

实验:用2层的MLP将 0-15 这16个整数,转换为一条长度为16的onehot向量,要求准确率100%
增加难度,修改为100个整数,onehot长度改为100。

16个整数情况下,不对原始数据进行处理
使用Adam优化器的迭代次数,一般都到了10000次以上,才能达到100%准确率
使用RMSprop优化器,多数情况2000次内迭代就达到了100%准确率

100个整数情况下
使用RMSprop优化器
不对原始数据处理,40000次迭代时准确率为0.45
对原始数据进行归一化处理,大约5100次迭代准确率达到1.0
对原始数据进行0均值归一化处理,大约3600次迭代准确率达到1.0
对原始数据进行正则化处理,大约2300次迭代准确率达到1.0
使用Adam优化器,哪种数据处理都不能收敛

这个是16个整数的实验,下面那个是100个整数的实验

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


class SelectNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
        super().__init__()
        self.mod_list = nn.ModuleList()
        for i in range(n_layer):
            d_in = in_dim if i == 0 else inter_dim
            d_out = out_dim if i == n_layer - 1 else inter_dim
            self.mod_list.append(nn.Linear(d_in, d_out))
            self.mod_list.append(act)

    def forward(self, x):
        y = x
        for m in self.mod_list:
            y = m(y)
        return y #F.sigmoid(y)


torch.set_grad_enabled(False)

in_dim = 1
out_dim = 4*4
inter_dim = int(out_dim*1.3)
n_layer = 2

net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))

x_train = np.arange(out_dim)

norm_type = '保持原样'

print('原数据处理方式', norm_type)

if norm_type == '归一化':
    # 使用归一化,长度为100时,这个迭代次数约为5100时准确率达到1.0
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0均值归一化':
    # 使用0均值归一化,长度为100时,这个迭代次数约为3600时准确率达到1.0
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == '正则化':
    # 使用正则化,长度为100时,这个迭代次数约为2300时准确率达到1.0
    x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == '保持原样':
    # 不使用归一化,这个最慢。。。长度为100时,再50000次迭代时,准确率只有0.45,然后终止了
    x_train = x_train
else:
    print('默认保持原样')

y_train = []

mask = np.ones(len(x_train))
# 先来先占用
for i in x_train:
    x = torch.Tensor([i])[None, ...]
    a = net(x).cpu().detach().numpy()[0]
    a -= (a.min()-1)
    a *= mask
    pos = np.argmax(a)
    y_train.append(pos)
    mask[pos] = 0

# y_train = x_train.copy()
print(y_train)

def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
    # 训练,准确率为1时结束
    optim = torch.optim.RMSprop(net.parameters(), lr=0.001)
    # optim = torch.optim.Adam(net.parameters(), lr=0.001)
    x_train = torch.Tensor(x_train)
    y_train = torch.Tensor(y_train)

    def check_accuracy(xs, ys):
        with torch.no_grad():
            out_ys = net(torch.Tensor(xs)).numpy()
        return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)

    with torch.enable_grad():
        for b in range(99999999999999999):
            batch_x = torch.Tensor(x_train).type(torch.float32)
            batch_y = torch.Tensor(y_train).type(torch.long)
            out = net(batch_x)
            loss = F.cross_entropy(out, batch_y)
            loss.backward()
            optim.step()

            acc = check_accuracy(x_train, y_train)
            print(b, acc, loss.item())
            if acc == 1:
                break


mini_train(net, x_train[:, None], y_train)

print('over')

这是100个整数的实验

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


class SelectNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
        super().__init__()
        self.mod_list = nn.ModuleList()
        for i in range(n_layer):
            d_in = in_dim if i == 0 else inter_dim
            d_out = out_dim if i == n_layer - 1 else inter_dim
            self.mod_list.append(nn.Linear(d_in, d_out))
            self.mod_list.append(act)

    def forward(self, x):
        y = x
        for m in self.mod_list:
            y = m(y)
        return y #F.sigmoid(y)


torch.set_grad_enabled(False)

in_dim = 1
out_dim = 10*10
inter_dim = int(out_dim*1.3)
n_layer = 2

net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))

x_train = np.arange(out_dim)

norm_type = '正则化'
optim_type = torch.optim.RMSprop
# optim_type = torch.optim.Adam


print('原数据处理方式', norm_type)

if norm_type == '归一化':
    # 使用归一化,长度为100时,这个迭代次数约为5100时准确率达到1.0
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0均值归一化':
    # 使用0均值归一化,长度为100时,这个迭代次数约为3600时准确率达到1.0
    x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == '正则化':
    # 使用正则化,长度为100时,这个迭代次数约为2300时准确率达到1.0
    x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == '保持原样':
    # 不使用归一化,这个最慢。。。长度为100时,再50000次迭代时,准确率只有0.45,然后终止了
    x_train = x_train
else:
    print('默认保持原样')

y_train = []

mask = np.ones(len(x_train))
# 先来先占用
for i in x_train:
    x = torch.Tensor([i])[None, ...]
    a = net(x).cpu().detach().numpy()[0]
    a -= (a.min()-1)
    a *= mask
    pos = np.argmax(a)
    y_train.append(pos)
    mask[pos] = 0

# y_train = x_train.copy()
print(y_train)

def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
    # 训练,准确率为1时结束
    optim = optim_type(net.parameters(), lr=0.001)
    x_train = torch.Tensor(x_train)
    y_train = torch.Tensor(y_train)

    def check_accuracy(xs, ys):
        with torch.no_grad():
            out_ys = net(torch.Tensor(xs)).numpy()
        return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)

    with torch.enable_grad():
        for b in range(99999999999999999):
            batch_x = torch.Tensor(x_train).type(torch.float32)
            batch_y = torch.Tensor(y_train).type(torch.long)
            out = net(batch_x)
            loss = F.cross_entropy(out, batch_y)
            loss.backward()
            optim.step()

            acc = check_accuracy(x_train, y_train)
            print(b, acc, loss.item())
            if acc == 1:
                break


mini_train(net, x_train[:, None], y_train)

print('over')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值