python 实现logisticRegression
1、实验使用的数据来自http://sci2s.ugr.es/keel/category.php?cat=clas
2、此处并没有考虑正则项,如果需要考虑正则,只需要修改梯度的计算
3、有关具体的推导请参考Andrew Ng.的课程http://cs229.stanford.edu/
#-*- coding=utf-8 -*-
import numpy as np
"""
time 2016.1.31
"""
class LogisticRegression(object):
"class logistic Regression"
def __init__(self,n,epo=1000,rate=0.001):
self.rate=rate
self.epo=epo
self.weights=np.random.normal(size=(n))
def fit(self,data):
for i in range(self.epo):
for j in range(data.shape[0]):
x=data[j][:-1]
y=data[j][-1]
h=self.predict(x)
self.weights=self.weights+self.rate*(y-h)*x
def sigmoid(self,x):
return 1/(1+np.exp(-x))
def predict(self,x):
g=sum(self.weights*x)
return self.sigmoid(g)
def accuracy(self,data):
num=0
for i in range(data.shape[0]):
x=data[i][:-1]
y=data[i][-1]
h=self.predict(x)
if h>=0.5:
if(y==1):
num+=1
if h<0.5:
if y==0:
num+=1
print "predict accuracy is %lf"%(num*1.0/data.shape[0])
def loadData(path):
data=np.loadtxt(path,skiprows=10,dtype="int32",delimiter=",")
new_data=np.ones((data.shape[0],data.shape[1]+1))
""""relarge the dataset with x0=1"""
new_data[:,1:]=data
return new_data
data=loadData("D:\\SelfLearning\\Machine Learning\\ClassifyDataSet\\mammographic\\mammographic.dat")
train_data=data[:600]
test_data=data[600:]
lr=LogisticRegression(6,epo=1000)
lr.fit(train_data)
print lr.weights
lr.accuracy(test_data)
lr.accuracy(train_data)