数据的可视化和梯度上升法
import matplotlib.pyplot as plt
定义数据集下载函数
def loadDataSet():
dataMat = []
labelMat = []
f = open('testSet.txt')
for line in f.readlines():
line_list = line.strip().split()
dataMat.append([1,float(line_list[0]),float(line_list[1])])
labelMat.append(int(line_list[2]))
return dataMat,labelMat
定义激活函数
def sigmoid(inX):
return 1.0 / (1+np.exp(-inX))
import numpy as np
dataMat,labelMat = loadDataSet()
dataMatArr = np.array(dataMat)
n = np.shape(dataArr)[0]
x_cord_1 = []; y_cord_1 = []
x_cord_2 = []; y_cord_2 = []
for i in range(n):
if int(labelMat[i]) == 1:
x_cord_1.append(dataMatArr[i,1])
y_cord_1.append(dataMatArr[i,2])
else:
x_cord_2.append(dataMatArr[i,1])
y_cord_2.append(dataMatArr[i,2])
绘制图像
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_cord_1,y_cord_1,s=30,c='red')
ax.scatter(x_cord_2,y_cord_2,s=30,c='green')
<matplotlib.collections.PathCollection at 0x8db64a8>


实现梯度上升算法函数
def gradAscent(dataMatIn, classLabels):
dataMatrix = np.mat(dataMatIn)
labelMatrix = np.mat(classLabels).transpose()
m,n = np.shape(dataMatrix)
alpha = 0.001
maxCycles = 500
weights = np.ones((n,1))
for k in range(maxCycles):
h = sigmoid(dataMatrix * weights)
error = (labelMatrix - h)
weights = weights + alpha * dataMatrix.transpose() * error
return weights
weights = gradAscent(dataArr,labelMat)
weights
matrix([[ 4.12414349],
[ 0.48007329],
[-0.6168482 ]])
分析数据,画出决策边界
def plotBestFit(weights):
dataMat,labelMat = loadDataSet()
dataArr = np.array(dataMat)
n = np.shape(dataArr)[0]
x_cord_1 = []; y_cord_1 = []
x_cord_2 = []; y_cord_2 = []
for i in range(n):
if int(labelMat[i]) == 1:
x_cord_1.append(dataArr[i,1])
y_cord_1.append(dataArr[i,2])
else:
x_cord_2.append(dataArr[i,1])
y_cord_2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_cord_1,y_cord_1,s=30,c='red')
ax.scatter(x_cord_2,y_cord_2,s=30,c='green')
x = np.arange(-3,3,0.1)
y = (-weights[0]-weights[1]*x) / weights[2]
ax.plot(x,y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
plotBestFit(weights.getA())
