使用梯度下降的逻辑回归
1.问题的描述
给出两组数据,每组数据分别赋予不同的类别,例如0,1.。使用逻辑回归对其进行分类。
2.解决思路
算法原理:
1)二项逻辑回归模型:
P(Y=0∣x)=1/(1+eω⋅x+b)
P(Y=1∣x)= e(ω⋅x+b)/(1+eω⋅x+b)
其中x∈Rn是输入,Y∈{0,1}是输出,ω∈ Rn和b∈R 是参数,ω称为权值向量,b称为偏置,ω⋅x为ω和x的内积。有时为了方便,将权值向量和输入向量加以扩充,仍然记为ω和x,但是ω=(ω1 ,ω2 , . . . , ωn ,b )T,x = (x1,x 2 , . . . , x n , 1 ) T。在这种情况下,二项逻辑回归模型如下:
P(Y=0∣x)=1/(1+eω⋅x)P(Y=1∣x)= eω⋅x/(1+eω⋅x)定义sigmoid函数为 sigmoid(z)=1/(1+e-z)
2) 似然函数法估计模型参数ω
设P (Y =1∣x)=π(x),P (Y=0∣x)=1-π(x),则似然函数为∏[π(xi)]yi[1− π(xi)]1−yi对数似然函数为:
不加正则项的损失函数为:
加入正则项的损失函数:
求L( ω ) 的 极 大 值的极大值(ω)的极大值,得到ω不加正则项求ω :
接下来可以由随机梯度下降法求解ω同理加入正则项的梯度为:
3.实验做法
def disgaosi():#生成两组不满足朴素贝叶斯的高斯数据用来分类
def gaosi():#生成两组满足朴素贝叶斯的高斯数据用来分类
def dataguifan(hmx,hmy,hmx1,hmy1): #将数据变成矩阵和向量用来逻辑回归分类
def loaddata(filename): #如果你想,也可以利用这个函数从文件中读取数据用于分类
def tidu(x1,y1,alpha,max): #梯度下降
def tidu2(x1,y1,alpha,max):#梯度下降+正则项
def zqlv(chmx,chmy,chmx1,chmy1,chy,w0,w1,w2,data):#用来求随机生成的数据正确率
def zqlv2(h01,hy,w0,w1,w2):#用来求从文件中读取的数据的正确率
以下是实现的代码:
#h01,hy = loaddata(‘data.txt’)
‘’‘hmx,hmy,hmx1,hmy1 = gaosi() #训练集
h01,hy = dataguifan(hmx,hmy,hmx1,hmy1) #训练集
chmx,chmy,chmx1,chmy1 = gaosi() #测试集
ch01,chy = dataguifan(chmx,chmy,chmx1,chmy1)#测试集’’’
hmx,hmy,hmx1,hmy1 = disgaosi() #训练集
h01,hy = dataguifan(hmx,hmy,hmx1,hmy1) #训练集
chmx,chmy,chmx1,chmy1 = disgaosi() #测试集
ch01,chy = dataguifan(chmx,chmy,chmx1,chmy1)#测试集’’’
W = tidu(h01,hy,alpha,max)
WC = tidu2(h01,hy,alpha,max)
print(‘W’,W)
print(‘WC’,WC)
W0 = W[0,0]
W1 = W[1,0]
W2 = W[2,0]
WC0 = WC[0,0]
WC1 = WC[1,0]
WC2 = WC[2,0]
#lv2 = zqlv2(h01,hy,W0,W1,W2) #data文件夹里的数据的正确率
#print(‘Accuracy:’,lv2100,’%’) #正确率
#lv2C = zqlv2(h01,hy,WC0,WC1,WC2) #data文件夹里的数据的正确率
#print(‘Accuracy:’,lv2C100,’%’) #正确率
lv = zqlv(chmx,chmy,chmx1,chmy1,chy,W0,W1,W2,data) #测试集正确率
print(‘Accuracy:’,lv100,’%’)
lvC = zqlv(chmx,chmy,chmx1,chmy1,chy,WC0,WC1,WC2,data) #测试集+惩罚项正确率
print(‘Accuracy+C:’,lvC100,’%’)
plotx1 = np.arange(-5,7.5,0.01)
plotx2 = -W0/W2 - W1/W2plotx1
plotxWC2 = -WC0/WC2 - WC1/WC2plotx1
plt.plot(plotx1,plotx2,c=‘r’,label=‘fenjiexian’)
plt.plot(plotx1,plotxWC2,c=‘b’,label=‘fenjiexian+C’)
#plt.scatter(hmx,hmy,c = ‘g’) #训练集的点 y=0
#plt.scatter(hmx1,hmy1,c = ‘r’,marker=’^’) #训练集的点 y=1
plt.scatter(chmx,chmy,c = ‘r’,label=‘y=0’) #测试集的点 y=0
plt.scatter(chmx1,chmy1,c = ‘g’,marker=’^’,label=‘y=1’) #测试集的点 y=1
#plt.scatter(h01[:,1][hy0].A,h01[:,2][hy0].A,marker=’^’,c = ‘r’,label=‘y=0’) #文件中读取的数 y=0
#plt.scatter(h01[:,1][hy1].A,h01[:,2][hy1].A,c = ‘g’,label=‘y=1’) # y=1
plt.grid()
plt.legend()
plt.show()
4.实验结果的分析和结论
GS data = 200 Accuracy:0.94 Accuracy:0.94
DIsGS data = 200 Accuracy:0.855 Accuracy:0.855
UCI data = 100 Accuracy=0.99 Accuracy=0.99
结论:1)关于惩罚项:对于逻辑回归带有惩罚项和没有惩罚项的差别没有多项式拟合函数那么大,在我所用的梯度下降法中当条件完全相同时,基本没有差异(大概运行十次会出现一次不同,还得是数据量很大的时候),所以我认为在逻辑回归中加入正则项无必要,只要迭代次数够多就行。
2)我本次实验采用的是2维数据,但经过我对逻辑回归的理解,认为只要方法正确无论几维的数据都可以用逻辑回归来分类,只不过是超过3维就不能画出图像。并且在代码实现方面并没有很多不同,顶多是在加几行向量而已。
3)非朴素贝叶斯数据和满足朴素贝叶斯的数据其实无太大差别,我给出的图象是有10%的差别,但是在更多的实验中平均差别并不大。也就是说逻辑回归可以灵活运用在非朴素贝叶斯数据和满足朴素贝叶斯的数据中。
#Logical disgs.py
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import random
data = 100
alpha = 0.001 #步长
max = 100000#迭代次数
rank = 3 #维数
def disgaosi():
x = np.random.normal(0,2,data).reshape(data,1) #data个高斯噪声data*1
y = np.random.normal(0,2,data).reshape(data,1) #data个高斯噪声data*1
hmx3 = np.ones((data,1))*3 #data*1的全3矩阵
x1 = x+hmx3
y1 = y+hmx3
return x,y,x1,y1
def dataguifan(hmx,hmy,hmx1,hmy1):
hmx0 = np.ones((data,1))*0 #data*1的全0矩阵
hm = np.hstack((hmx,hmy)) #合并x,y
hmx11 = np.ones((data,1))*1 #data*1的全1矩阵
hm1 = np.hstack((hmx1,hmy1)) #合并x1,y1
hmx2 = np.ones((2*data,1))*1 #生成2*data*1的全1矩阵
h01 = np.vstack((hm,hm1)) #合并hm和hm1
h01 = np.hstack((hmx2,h01)) #生成[1,hm,hm1]矩阵
hy = np.vstack((hmx0,hmx11)) #生成特征矩阵data*1的
return h01,hy
def tidu(x1,y1,alpha,max): #梯度下降
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def tidu2(x1,y1,alpha,max):#梯度下降+正则项
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
W = W*(1-1e-4) #正则项
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def zqlv(chmx,chmy,chmx1,chmy1,chy,w0,w1,w2,data):
x = 0
y = 0
for i in range(0,data):
if (w0+w1*chmx[i]+w2*chmy[i]>=0.0):
x = x+1
if (w0+w1*chmx1[i]+w2*chmy1[i]<=0.0):
y = y+1
return 1-(x+y)/(2*data)
hmx,hmy,hmx1,hmy1 = disgaosi() #训练集
h01,hy = dataguifan(hmx,hmy,hmx1,hmy1) #训练集
chmx,chmy,chmx1,chmy1 = disgaosi() #测试集
ch01,chy = dataguifan(chmx,chmy,chmx1,chmy1)#测试集'''
W = tidu(h01,hy,alpha,max)
WC = tidu2(h01,hy,alpha,max)
print('W',W)
print('WC',WC)
W0 = W[0,0]
W1 = W[1,0]
W2 = W[2,0]
WC0 = WC[0,0]
WC1 = WC[1,0]
WC2 = WC[2,0]
lv = zqlv(chmx,chmy,chmx1,chmy1,chy,W0,W1,W2,data) #测试集正确率
print('Accuracy:',lv*100,'%')
lvC = zqlv(chmx,chmy,chmx1,chmy1,chy,WC0,WC1,WC2,data) #测试集+惩罚项正确率
print('Accuracy+C:',lvC*100,'%')
plotx1 = np.arange(-5,7.5,0.01)
plotx2 = -W0/W2 - W1/W2*plotx1
plotxWC2 = -WC0/WC2 - WC1/WC2*plotx1
plt.plot(plotx1,plotx2,c='r',label='fenjiexian')
plt.plot(plotx1,plotxWC2,c='b',label='fenjiexian+C')
#plt.scatter(hmx,hmy,c = 'g') #训练集的点 y=0
#plt.scatter(hmx1,hmy1,c = 'r',marker='^') #训练集的点 y=1
plt.scatter(chmx,chmy,c = 'r',label='y=0') #测试集的点 y=0
plt.scatter(chmx1,chmy1,c = 'g',marker='^',label='y=1') #测试集的点 y=1
plt.grid()
plt.legend()
plt.show()
#Logical gs.py
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import random
data = 100
alpha = 0.001 #步长
max = 100000#迭代次数
rank = 3
def gaosi():
x = np.random.normal(0,2,data).reshape(data,1) #data个高斯噪声data*1
y = np.random.normal(0,2,data).reshape(data,1) #data个高斯噪声data*1
x1 = np.random.normal(5,2,data).reshape(data,1) #data个高斯噪声data*1
y1 = np.random.normal(5,2,data).reshape(data,1) #data个高斯噪声data*1
return x,y,x1,y1
def dataguifan(hmx,hmy,hmx1,hmy1):
hmx0 = np.ones((data,1))*0 #data*1的全0矩阵
hm = np.hstack((hmx,hmy)) #合并x,y
hmx11 = np.ones((data,1))*1 #data*1的全1矩阵
hm1 = np.hstack((hmx1,hmy1)) #合并x1,y1
hmx2 = np.ones((2*data,1))*1 #生成2*data*1的全1矩阵
h01 = np.vstack((hm,hm1)) #合并hm和hm1
h01 = np.hstack((hmx2,h01)) #生成[1,hm,hm1]矩阵
hy = np.vstack((hmx0,hmx11)) #生成特征矩阵data*1的
return h01,hy
def tidu(x1,y1,alpha,max): #梯度下降
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def tidu2(x1,y1,alpha,max):#梯度下降+正则项
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
W = W*(1-1e-4) #正则项
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def zqlv(chmx,chmy,chmx1,chmy1,chy,w0,w1,w2,data):
x = 0
y = 0
for i in range(0,data):
if (w0+w1*chmx[i]+w2*chmy[i]>=0.0):
x = x+1
if (w0+w1*chmx1[i]+w2*chmy1[i]<=0.0):
y = y+1
return 1-(x+y)/(2*data)
hmx,hmy,hmx1,hmy1 = gaosi() #训练集
h01,hy = dataguifan(hmx,hmy,hmx1,hmy1) #训练集
chmx,chmy,chmx1,chmy1 = gaosi() #测试集
ch01,chy = dataguifan(chmx,chmy,chmx1,chmy1)#测试集'''
W = tidu(h01,hy,alpha,max)
WC = tidu2(h01,hy,alpha,max)
print('W',W)
print('WC',WC)
W0 = W[0,0]
W1 = W[1,0]
W2 = W[2,0]
WC0 = WC[0,0]
WC1 = WC[1,0]
WC2 = WC[2,0]
lv = zqlv(chmx,chmy,chmx1,chmy1,chy,W0,W1,W2,data) #测试集正确率
print('Accuracy:',lv*100,'%')
lvC = zqlv(chmx,chmy,chmx1,chmy1,chy,WC0,WC1,WC2,data) #测试集+惩罚项正确率
print('Accuracy+C:',lvC*100,'%')
plotx1 = np.arange(-5,7.5,0.01)
plotx2 = -W0/W2 - W1/W2*plotx1
plotxWC2 = -WC0/WC2 - WC1/WC2*plotx1
plt.plot(plotx1,plotx2,c='r',label='fenjiexian')
plt.plot(plotx1,plotxWC2,c='b',label='fenjiexian+C')
#plt.scatter(hmx,hmy,c = 'g') #训练集的点 y=0
#plt.scatter(hmx1,hmy1,c = 'r',marker='^') #训练集的点 y=1
plt.scatter(chmx,chmy,c = 'r',label='y=0') #测试集的点 y=0
plt.scatter(chmx1,chmy1,c = 'g',marker='^',label='y=1') #测试集的点 y=1
plt.grid()
plt.legend()
plt.show()
Logical UCI.PY
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import random
data = 100
alpha = 0.001 #步长
max = 100000#迭代次数
rank = 3 #维数
def dataguifan(hmx,hmy,hmx1,hmy1):
hmx0 = np.ones((data,1))*0 #data*1的全0矩阵
hm = np.hstack((hmx,hmy)) #合并x,y
hmx11 = np.ones((data,1))*1 #data*1的全1矩阵
hm1 = np.hstack((hmx1,hmy1)) #合并x1,y1
hmx2 = np.ones((2*data,1))*1 #生成2*data*1的全1矩阵
h01 = np.vstack((hm,hm1)) #合并hm和hm1
h01 = np.hstack((hmx2,h01)) #生成[1,hm,hm1]矩阵
hy = np.vstack((hmx0,hmx11)) #生成特征矩阵data*1的
return h01,hy
def loaddata(filename): #如果你想,也可以利用这个函数从文件中读取数据
file = open(filename)
x = []
y = []
for line in file.readlines():
line = line.strip().split()
x.append([1,float(line[0]),float(line[1])])
y.append(float(line[-1]))
x1 = np.mat(x)
y1 = np.mat(y).T
file.close
return x1,y1
def tidu(x1,y1,alpha,max): #梯度下降
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def tidu2(x1,y1,alpha,max):#梯度下降+正则项
W = np.mat(np.random.randn(rank,1))
for i in range(0,max):
W = W*(1-1e-4) #正则项
H = 1/(1+np.exp(-1*x1*W))
dw = x1.T*(H-y1)#3,1
W = W - alpha*dw
return W
def zqlv2(h01,hy,w0,w1,w2):
data1 = np.size(hy,0)
print('data1',data1)
x = 0.0
y = 0.0
for i in range(0,data1):
if((w0+w1*h01[:,1][i]+w2*h01[:,2][i]>=0.0) and (hy[i]==0)):
x=x+1
if((w0+w1*h01[:,1][i]+w2*h01[:,2][i]<=0.0) and (hy[i]==1)):
y =y+1
return 1-(x+y)/data1
h01,hy = loaddata('data.txt')
W = tidu(h01,hy,alpha,max)
WC = tidu2(h01,hy,alpha,max)
print('W',W)
print('WC',WC)
W0 = W[0,0]
W1 = W[1,0]
W2 = W[2,0]
WC0 = WC[0,0]
WC1 = WC[1,0]
WC2 = WC[2,0]
lv2 = zqlv2(h01,hy,W0,W1,W2) #data文件夹里的数据的正确率
print('Accuracy:',lv2*100,'%') #正确率
lv2C = zqlv2(h01,hy,WC0,WC1,WC2) #data文件夹里的数据的正确率
print('Accuracy+C:',lv2C*100,'%') #正确率
plotx1 = np.arange(4,8,0.01)
plotx2 = -W0/W2 - W1/W2*plotx1
plotxWC2 = -WC0/WC2 - WC1/WC2*plotx1
plt.plot(plotx1,plotx2,c='r',label='fenjiexian')
plt.plot(plotx1,plotxWC2,c='b',label='fenjiexian+C')
plt.scatter(h01[:,1][hy==0].A,h01[:,2][hy==0].A,marker='^',c = 'r',label='y=0') #文件中读取的数 y=0
plt.scatter(h01[:,1][hy==1].A,h01[:,2][hy==1].A,c = 'g',label='y=1') # y=1
plt.grid()
plt.legend()
plt.show()