基本知识
Softmax 回归是一种多分类模型,也称做多类Logistic回归;
模型假设
代价函数
梯度计算
代码实现
import numpy as np
import math
import matplotlib.pyplot as plt
import random
#获取数据
def getData():
X=[]
Y=[]
X0=[]
X1=[]
X2=[]
Y0=[]
with open("./ex4Data/ex4x.dat") as f:
for line in f.readlines():
data =line.split('\n\t')
for str0 in data:
sub_str = str0.split(' ')
if sub_str:
X.append(sub_str)
for i in range(0,80):
X0.append(float(1))
X1.append(float(X[i][3]))
X2.append(float(X[i][6]))
X=[X0,X1,X2]
with open("./ex4Data/ex4y.dat") as f2:
for line in f2.readlines():
data =line.split('\n\t')
for str0 in data:
sub_str=str0.split(' ')
if sub_str:
Y0.append(sub_str)
for i in range(0,80):
Y.append(float(Y0[i][3]))
xMat=np.mat(X).T
yMat=np.mat(Y).T
x0Mat=np.mat(X0)
return xMat,yMat,x0Mat
xMat,yMat,x0Mat=getData()
#softmax函数
def softMax(x):
return np.exp(x)/np.sum(np.exp(x),axis=0)
#损失函数
def cost(x,y,weights):
m,n=x.shape
hypothesis=softMax( np.dot(weights,x.T) )
cost=(-1/m)*np.sum(np.diagonal(np.dot(y,np.log(hypothesis))))
return cost
#one_hot编码
def one_hot(y):
m,n=y.shape #80*1
for i in range(0,m):
if i<40:
y[i]=[0,1]
else:
y[i]=[1,0]
return y
#批处理梯度下降
def Training(x,y,alpha=0.001,maxepoch=3000):
x_m,x_n=x.shape#80*3
y_m,y_n=y.shape#80*2
y=one_hot(y)
weights=np.zeros((y_n,x_n)) #2*3
epoch=0
loss_list=[]
while epoch<maxepoch:
loss=cost(x,y,weights)
hypothesis=softMax( np.dot(weights,x.T) )#80*3
grad=(-y.T+hypothesis)*x
weights=weights-alpha*grad
loss_list.append(loss)
epoch+=1
print("这是第"+str(epoch)+"次迭代")
plt.plot(loss_list)
plt.show()
return weights
#随机梯度下降
def SGD_Training(x,y,alpha=0.001,maxepoch=10000):
x_m,x_n=x.shape#80*3
y_m,y_n=y.shape#80*2
y=one_hot(y)
weights=np.zeros((y_n,x_n)) #2*3
weights[0]=[0.2,-1.13,0.5]
weights[1]=[-0.2,1.13,0.5]
epoch=0
loss_list=[]
while epoch<maxepoch:
i=random.randint(0,x_m-1)
data_x=x[i]
data_y=y[i]
loss=cost(x,y,weights)
hypothesis=softMax( np.dot(weights,data_x.T) )#80*3
grad=(-data_y.T+hypothesis)*data_x
weights=weights-alpha*grad
loss_new=cost(x,y,weights)
if abs(loss_new-loss)<0.000001:
break
loss_list.append(loss)
epoch+=1
print("这是第"+str(epoch)+"次迭代")
plt.plot(loss_list)
plt.show()
return weights
weights=SGD_Training(xMat,yMat)
print(weights)