逻辑回归
- 引入我们的模块
- 显示中文
In [1]:
import numpy as np from matplotlib import pyplot as plt # import matplotlib.pyplot as plt import matplotlib matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['font.family']='sans-serif' matplotlib.rcParams['axes.unicode_minus'] = False
- 加载数据
In [2]:
def loadDataSet(filename): X = [] Y = [] with open(filename, 'rb') as f: for idx , line in enumerate(f): line = line.decode('utf-8').strip() if not line: continue eles = line.split() if idx == 0: numFea = len(eles) eles = list(map(float, eles)) X.append(eles[:-1]) # 除了最后一列我们都当做特征 Y.append([eles[-1]]) # 最后一列当做标注 return np.array(X), np.array(Y)
In [3]:
def sigmoid(z): # sigmoid函数,也叫做logistic函数 return 1.0 / (1.0 + np.exp(-z)) # 用浮点数,尽量使用
- 代价函数
In [4]:
def J(theta, X, y, theLambda = 0): # 一开始把正则化项设为 0 m, n = X.shape h = sigmoid(np.dot(X, theta)) J = (-1.0 / m)*(np.log(h).T.dot(y) + np.log(1 - h).T.dot(1 - y)) + (theLambda/(2.0*m))*np.sum(np.square(theta[1:])) if np.isnan(J[0]): return np.inf return J.flatten()[0] # 因为返回的J 是一个ndarray,所以把J 打平之后拿到第一个数值
- 梯度下降
In [20]:
def gradient(X, y, options): # X是样本,y是标注,要么是0要么是1,要么是我们给的其他标注,options是一个超级参 """ # 数我们给他赋值 options.alpha 学习率 options.theLambda 正则参数λ options.maxLoop 最大迭代轮次 options.epsilon 判断收敛的阈值 options.method - 'sgd' 随机梯度下降 - 'bgd' 批量梯度下降 """ m, n = X.shape # 拿到样本和特征的数目 # 初始化模型参数,n个特征对应n和参数 theta = np.zeros((n,1)) error = J(theta, X, y) # 当前误差 errors = [error,] # 迭代中的每一轮的误差 thetas = [theta,] alpha = options.get('alpha', 0.01) # 超级参数第一个是传入的值,如果没有传入值就用后面的默认值 epsilon = options.get('epsilon', 0.00001) maxLoop = options.get('maxLoop', 1000) theLambda = float(options.get('theLambda', 0)) # 后面有 theLambda/m 的计算,如果这里不转成float,后面这个就全是0 method = options.get('method', 'bgd') # 随机梯度下降 def _sgd(theta): count = 0 # 迭代轮次 converged = False while count < maxLoop: if converged: break # 随机梯度下降,每一个样本都更新,for循环中的是对每一个样本单独训练 for i in range(m): # 拿到地X[i]个样本,把他reshape成一个1 x N 的矩阵,跟 N x 1 的theta点乘 h = sigmoid(np.dot(X[i].reshape((1,n)), theta)) # h 获得当前样本的对应事件的概率 theta = theta - alpha*((1.0/m)*X[i].reshape((n,1)) * (h - y[i]) + (theLambda/m)*np.r_[[[0]], theta[1:]]) thetas.append(theta) error = J(theta, X, y, theLambda) errors.append(error) if abs(errors[-1] - errors[-2]) < epsilon: converged = True break count += 1 return thetas,errors,count # 批量梯度下降 def _bgd(theta): count = 0 converged = False while count < maxLoop: if converged: break h = sigmoid(np.dot(X, theta)) theta = theta - alpha*((1.0/m)* np.dot(X.T, (h - y)) + (theLambda/m)*np.r_[[[0]], theta[1:]]) thetas.append(theta) error = J(theta, X, y, theLambda) errors.append(error) count += 1 if abs(errors[-1] - errors[-2]) < epsilon: converged = True break return thetas, errors, count methods = {'sgd': _sgd, 'bgd': _bgd} # 通过用户传过来sgd或者bgd从这个字典中找对应的函数 return methods[method](theta)
- 加载数据
In [6]:
ori_X, y = loadDataSet('./data/linear.txt') m, n = ori_X.shape X = np.concatenate((np.ones((m, 1)), ori_X), axis = 1)
In [7]:
options = { 'alpha': 0.1, 'epsilon': 0.00000000001, 'maxLoop': 10000, 'method':'bgd' # sgd }
- 训练模型
In [8]:
thetas, errors, iterationCount = gradient(X, y, options)
In [9]:
errors[-1], errors[-2], iterationCount
Out[9]:
(0.09737831553078126, 0.0973790563744117, 10000)
In [10]:
%matplotlib inline
- 画决策边界
In [29]:
X[i] # X 有两个特征,第一个是偏置项,X[0]都是1
Out[29]:
array([ 1. , 0.63265 , -0.030612])
In [25]:
m = X[i]
In [23]:
m[1]
Out[23]:
0.63265
In [28]:
m[2]
Out[28]:
-0.030612
In [11]:
for i in range(m): x = X[i] if y[i] == 1: # x[1], x[2]为第一个特征和第二个特征,定义一个坐标点 plt.scatter(x[1], x[2], marker='*', color='blue', s=50) else: plt.scatter(x[1], x[2], marker='o', color='green', s=50) hSpots = np.linspace(X[:,1].min(), X[:,1].max(), 100) # 横轴x1 theta0, theta1, theta2 = thetas[-1] vSpots = -(theta0 + theta1 * hSpots) / theta2 # 纵轴x2 plt.plot(hSpots, vSpots, color='red', linewidth=.5) # 画决策边界 plt.xlabel(r'$x_1$') plt.ylabel(r'$x_2$') plt.show()
- 画损失函数
In [12]:
plt.plot(range(len(errors)), errors) plt.xlabel(u'迭代次数') plt.ylabel(u'代价J') plt.show()
- 随着迭代轮次的增加,画参数 theta 的变化曲线
In [13]:
# 其中thetasFig是参数图像,ax代表子图,plt.subplots(len(thetas[0]))返回有参数长度这么多行的子图 thetasFig, ax = plt.subplots(len(thetas[0])) thetas = np.asarray(thetas) # 编程一个array for idx, sp in enumerate(ax): # 遍历子图并编号,得到所有的参数对应的编号和子图号 thetaList = thetas[:, idx] # 不同列参数标记不同的参数对应画到不同的子图中 sp.plot(range(len(thetaList)), thetaList) sp.set_xlabel('Number of iteration') sp.set_ylabel(r'$\theta_%d$'%idx)
非线性决策边界
In [14]:
from sklearn.preprocessing import PolynomialFeatures # 非线性决策边界 ori_X, y = loadDataSet('./data/non_linear.txt') m, n = ori_X.shape X = np.concatenate((np.ones((m,1)), ori_X), axis=1) # 增加多项式项,用sklearn模块中的预处理包中的多项式feature,6就是6次方特征 poly = PolynomialFeatures(6) XX = poly.fit_transform(X[:,1:3]) m, n = XX.shape
In [15]:
options = { 'alpha': 1.0, 'epsilon': 0.00000001, 'theLambda': 0, 'maxLoop': 10000, 'method': 'bgd' }
In [16]:
thetas, errors, iterationCount = gradient(XX, y, options)
In [17]:
thetas[-1], errors[-1], iterationCount
Out[17]:
(array([[ 4.15830294], [ 2.10551048], [ 5.23253682], [-5.83414544], [-7.67752609], [-7.28923819], [ 2.46147543], [-0.22788302], [ 3.2198704 ], [-3.09468095], [-4.52156975], [ 3.9332249 ], [-4.08193076], [-2.71489978], [-6.33761165], [-1.79250774], [-0.53711734], [ 5.72823742], [-4.19940451], [-4.29890178], [ 3.19454695], [-5.94924455], [ 1.31894157], [-0.93549023], [ 3.26435671], [-4.6439225 ], [-4.04001109], [ 0.81761338]]), 0.3142660259332875, 10000)
- 画非线性分类的决策边界
- λ的大小对拟合影响很大
In [18]:
for i in range(m): x = X[i] if y[i] == 1: plt.scatter(x[1], x[2], marker='*', color='blue', s=50) else: plt.scatter(x[1], x[2], marker='o', color='green', s=50) # 绘制决策边界 # 使用康托图,只把分类边界的线画出来 x1Min,x1Max,x2Min,x2Max = X[:, 1].min(), X[:, 1].max(), X[:, 2].min(), X[:, 2].max() xx1, xx2 = np.meshgrid(np.linspace(x1Min, x1Max), np.linspace(x2Min, x2Max)) h = sigmoid(poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas[-1])) h = h.reshape(xx1.shape) plt.contour(xx1, xx2, h, [0.5], colors='red') plt.show()
In [19]:
for i in range(m): x = X[i] if y[i] == 1: plt.scatter(x[1], x[2], marker='*', color='blue', s=50) else: plt.scatter(x[1], x[2], marker='o', color='green', s=50) # 绘制决策边界 x1Min,x1Max,x2Min,x2Max = X[:, 1].min(), X[:, 1].max(), X[:, 2].min(), X[:, 2].max() xx1, xx2 = np.meshgrid(np.linspace(x1Min, x1Max), np.linspace(x2Min, x2Max)) h = sigmoid(poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas[-1])) h = h.reshape(xx1.shape) plt.contour(xx1, xx2, h, [0.5], colors='red') plt.show()