实验:用2层的MLP将 0-15 这16个整数,转换为一条长度为16的onehot向量,要求准确率100%
增加难度,修改为100个整数,onehot长度改为100。
16个整数情况下,不对原始数据进行处理
使用Adam优化器的迭代次数,一般都到了10000次以上,才能达到100%准确率
使用RMSprop优化器,多数情况2000次内迭代就达到了100%准确率
100个整数情况下
使用RMSprop优化器
不对原始数据处理,40000次迭代时准确率为0.45
对原始数据进行归一化处理,大约5100次迭代准确率达到1.0
对原始数据进行0均值归一化处理,大约3600次迭代准确率达到1.0
对原始数据进行正则化处理,大约2300次迭代准确率达到1.0
使用Adam优化器,哪种数据处理都不能收敛
这个是16个整数的实验,下面那个是100个整数的实验
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class SelectNet(nn.Module):
def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
super().__init__()
self.mod_list = nn.ModuleList()
for i in range(n_layer):
d_in = in_dim if i == 0 else inter_dim
d_out = out_dim if i == n_layer - 1 else inter_dim
self.mod_list.append(nn.Linear(d_in, d_out))
self.mod_list.append(act)
def forward(self, x):
y = x
for m in self.mod_list:
y = m(y)
return y #F.sigmoid(y)
torch.set_grad_enabled(False)
in_dim = 1
out_dim = 4*4
inter_dim = int(out_dim*1.3)
n_layer = 2
net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))
x_train = np.arange(out_dim)
norm_type = '保持原样'
print('原数据处理方式', norm_type)
if norm_type == '归一化':
# 使用归一化,长度为100时,这个迭代次数约为5100时准确率达到1.0
x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0均值归一化':
# 使用0均值归一化,长度为100时,这个迭代次数约为3600时准确率达到1.0
x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == '正则化':
# 使用正则化,长度为100时,这个迭代次数约为2300时准确率达到1.0
x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == '保持原样':
# 不使用归一化,这个最慢。。。长度为100时,再50000次迭代时,准确率只有0.45,然后终止了
x_train = x_train
else:
print('默认保持原样')
y_train = []
mask = np.ones(len(x_train))
# 先来先占用
for i in x_train:
x = torch.Tensor([i])[None, ...]
a = net(x).cpu().detach().numpy()[0]
a -= (a.min()-1)
a *= mask
pos = np.argmax(a)
y_train.append(pos)
mask[pos] = 0
# y_train = x_train.copy()
print(y_train)
def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
# 训练,准确率为1时结束
optim = torch.optim.RMSprop(net.parameters(), lr=0.001)
# optim = torch.optim.Adam(net.parameters(), lr=0.001)
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
def check_accuracy(xs, ys):
with torch.no_grad():
out_ys = net(torch.Tensor(xs)).numpy()
return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)
with torch.enable_grad():
for b in range(99999999999999999):
batch_x = torch.Tensor(x_train).type(torch.float32)
batch_y = torch.Tensor(y_train).type(torch.long)
out = net(batch_x)
loss = F.cross_entropy(out, batch_y)
loss.backward()
optim.step()
acc = check_accuracy(x_train, y_train)
print(b, acc, loss.item())
if acc == 1:
break
mini_train(net, x_train[:, None], y_train)
print('over')
这是100个整数的实验
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class SelectNet(nn.Module):
def __init__(self, in_dim=1, out_dim=4*4, inter_dim=32, n_layer=2, act=nn.LeakyReLU(0.1)):
super().__init__()
self.mod_list = nn.ModuleList()
for i in range(n_layer):
d_in = in_dim if i == 0 else inter_dim
d_out = out_dim if i == n_layer - 1 else inter_dim
self.mod_list.append(nn.Linear(d_in, d_out))
self.mod_list.append(act)
def forward(self, x):
y = x
for m in self.mod_list:
y = m(y)
return y #F.sigmoid(y)
torch.set_grad_enabled(False)
in_dim = 1
out_dim = 10*10
inter_dim = int(out_dim*1.3)
n_layer = 2
net = SelectNet(in_dim, out_dim, inter_dim, n_layer, nn.LeakyReLU(0.1))
x_train = np.arange(out_dim)
norm_type = '正则化'
optim_type = torch.optim.RMSprop
# optim_type = torch.optim.Adam
print('原数据处理方式', norm_type)
if norm_type == '归一化':
# 使用归一化,长度为100时,这个迭代次数约为5100时准确率达到1.0
x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min())
elif norm_type == '0均值归一化':
# 使用0均值归一化,长度为100时,这个迭代次数约为3600时准确率达到1.0
x_train = (x_train - x_train.min()) / (x_train.max() - x_train.min().min()) * 2 - 1
elif norm_type == '正则化':
# 使用正则化,长度为100时,这个迭代次数约为2300时准确率达到1.0
x_train = (x_train - x_train.mean()) / np.std(x_train)
elif norm_type == '保持原样':
# 不使用归一化,这个最慢。。。长度为100时,再50000次迭代时,准确率只有0.45,然后终止了
x_train = x_train
else:
print('默认保持原样')
y_train = []
mask = np.ones(len(x_train))
# 先来先占用
for i in x_train:
x = torch.Tensor([i])[None, ...]
a = net(x).cpu().detach().numpy()[0]
a -= (a.min()-1)
a *= mask
pos = np.argmax(a)
y_train.append(pos)
mask[pos] = 0
# y_train = x_train.copy()
print(y_train)
def mini_train(net, x_train, y_train, batch_size=None, target_accuracy=None, max_count=None):
# 训练,准确率为1时结束
optim = optim_type(net.parameters(), lr=0.001)
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
def check_accuracy(xs, ys):
with torch.no_grad():
out_ys = net(torch.Tensor(xs)).numpy()
return np.sum(np.argmax(out_ys, 1) == ys.numpy()) / len(ys)
with torch.enable_grad():
for b in range(99999999999999999):
batch_x = torch.Tensor(x_train).type(torch.float32)
batch_y = torch.Tensor(y_train).type(torch.long)
out = net(batch_x)
loss = F.cross_entropy(out, batch_y)
loss.backward()
optim.step()
acc = check_accuracy(x_train, y_train)
print(b, acc, loss.item())
if acc == 1:
break
mini_train(net, x_train[:, None], y_train)
print('over')