from numpy import *
def loadDataSet(): #遍历函数,打开文本文件testSet.txt并进行逐行读取
dataMat = []
labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split() #去掉文件中换行符且划分文件为行
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) #将X0的值设为1,因为公式w0+w1x1+w2x2
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
def sigmoid(inX): #sigmoid函数
return 1.0/(1+exp(-inX))
def gradAscent(dataMatIn, classLabels): #梯度上升算法计算最佳回归系数
dataMatrix = mat(dataMatIn) #获得输入数据并将样本数组转换为矩阵
labelMat = mat(classLabels).transpose() #将类标签数组转换为项链并将其转置
m,n = shape(dataMatrix) #得到矩阵的大小
alpha = 0.001 #步长
maxCycles = 500 #迭代次数
weights = ones((n,1)) #回归系数初始化为1,n*1的向量
for k in range(maxCycles): #遍历数组
h = sigmoid(dataMatrix*weights) #h是一个列向量,元素个数等于样本个数,矩阵相乘
error = (labelMat - h) #误差计算,向量减法运算
weights = weights + alpha * dataMatrix.transpose()* error #矩阵相乘,dataMatrix.transpose()*error就是梯度f(w)
return weights
def plotBestFit(weights): #画出训练点
import matplotlib.pyplot as plt
dataMat,labelMat=loadDataSet() #画点
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i])== 1:
xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3.0, 3.0, 0.1) #画线,x取值区间为[-3.0,3.0],步长为0.1
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x, y)
plt.xlabel('X1’)
plt.ylabel('X2')
plt.show() #显示
def stocGradAscent0(dataMatrix, classLabels): #随机梯度上升算法
m,n = shape(dataMatrix)
alpha =