可能是全网用《最复杂繁琐的代码》一步步实现一个《由几个全连接构成的最最最基础的BP神经网络》的版本之一。
硬核打工人能手搓就绝不可能掉包!!!
数据集:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
import torch.nn as nn
import pandas as pd
import os
import glob
from torch.utils.tensorboard import SummaryWriter
import sys
import matplotlib.pyplot as plt
class ExcelDataset(Dataset):
def __init__(self, filepath=r"C:\原始数据_幸福度.xlsx"):
self.df=pd.read_excel(filepath,header=0,
names=['Q1-幸福度','Q2-经济水平','Q3-情感支持水平','Q4-身体健康水平','Q5-受教育程度'],
dtype={'Q1-幸福度':np.float32,'Q2-经济水平':np.float32,'Q3-情感支持水平':np.float32,'Q4-身体健康水平':np.float32,'Q5-受教育程度':np.float32})
self.feature = self.df.iloc[:130, 1:].values # 除去第一列的前两列
self.label=self.df.iloc[:130, 0].values # 除去第一列的前两列
self.x = torch.from_numpy(self.feature)
self.y = torch.from_numpy(self.label)
# print(f'feature:{self.x}')
# print(f'label:{self.y}')
def __len__(self):
return len(self.y)
def __getitem__(self, index):
x=self.x[index]
y=self.y[index]
return {'x':x,'y':y}
dataset=ExcelDataset()
dataloader=DataLoader(dataset=dataset, batch_size=5,shuffle=True,num_workers=0)
test_df=pd.read_excel(r"C:\Users\zhang\Downloads\原始数据_幸福度.xlsx",header=0,
names=['Q1-幸福度','Q2-经济水平','Q3-情感支持水平','Q4-身体健康水平','Q5-受教育程度'],
dtype={'Q1-幸福度':np.float32,'Q2-经济水平':np.float32,'Q3-情感支持水平':np.float32,'Q4-身体健康水平':np.float32,'Q5-受教育程度':np.float32})
test_feature = test_df.iloc[131:, 1:].values
test_x=torch.from_numpy(test_feature)
test_label=test_df.iloc[131:, 0].values
class BP(nn.Module):
def __init__(self):
super(BP,self).__init__()
self.queue=[nn.Linear(4,8),nn.LeakyReLU(),nn.Linear(8,12),nn.LeakyReLU(),nn.Linear(12,16),nn.LeakyReLU(),nn.Linear(16,12),nn.LeakyReLU(),nn.Linear(12,8),nn.LeakyReLU(),nn.Linear(8,1),nn.LeakyReLU()]
self.model=nn.Sequential(*self.queue)
def forward(self,input):
output=self.model(input)
output=output.squeeze(1)
return output
n_epochs=1000
bp=BP()
bp=bp.cuda()
optimizer = torch.optim.Adam(bp.parameters(), lr=0.01, betas=(0.5, 0.999))
MSEloss=nn.MSELoss()
# if not os.path.exists("logger"): #保存tensorboard
# os.makedirs("logger",exist_ok=True)
def train():
writer = SummaryWriter('lgger')
for epoch in range(0, n_epochs): ## for epoch in (0, 50)
for i,value in enumerate(dataloader):
x=value['x']
y=value['y']
x=x.cuda()
print(f"x:{x.shape}")
print(f"y:{y.shape}")
y=y.cuda()
y_train=bp(x)
print(f"y_train:{y_train.shape}")
loss=MSEloss(y,y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(
"\r[Epoch %d/%d] [Batch %d/%d] [loss: %f]"
% (
epoch,
n_epochs,
i,
len(dataloader),
loss.item(),
))
batches_done = epoch * len(dataloader) + i
writer.add_scalar(tag="loss", # 可以暂时理解为图像的名字
scalar_value=loss.item(), # 纵坐标的值
global_step=batches_done # 当前是第几次迭代,可以理解为横坐标的值
)
torch.save(bp.state_dict(), "bp_%d.pth" % n_epochs)
writer.close()
def test():
predict_list=[]
bp.load_state_dict(torch.load("bp_%d.pth" % n_epochs))
bp.eval()
for test_feature in test_x:
# print(f'test_feature:{test_feature}')
test_feature=test_feature.unsqueeze(0)
test_feature=test_feature.cuda()
predict=bp(test_feature)
# print(f'predict:{predict}')
predict=predict.squeeze(0)
predict_list.append(predict.detach().cpu().numpy())
predict_list=np.array(predict_list)
# predict_list=predict_list.squeeze(1)
# print(predict_list)
# print(test_label)
plt.figure(figsize=(10, 6))
plt.rcParams['font.sans-serif'] = ['Kaitt', 'SimHei']
plt.plot(test_label, label="观测值")
plt.plot(test_label, linestyle='--', label="预测值")
plt.grid()
plt.legend()
plt.show()
#train()
test()
训练过程的损失函数变化:(epoch_num=1000)
测试集上的拟合结果: