[Python嗯~机器学习]---用python实现逻辑回归

最新推荐文章于 2023-08-05 13:11:19 发布

鹏鹏哥哥的小红帽

最新推荐文章于 2023-08-05 13:11:19 发布

阅读量503

点赞数

分类专栏：鹏鹏哥哥的机器学习文章标签：机器学习 python 随机梯度下降批量梯度下降逻辑回归

本文链接：https://blog.youkuaiyun.com/kepengs/article/details/84886737

版权

鹏鹏哥哥的机器学习专栏收录该内容

52 篇文章

订阅专栏

逻辑回归

引入我们的模块
显示中文

In [1]:

import numpy as np
from matplotlib import pyplot as plt                # import matplotlib.pyplot as plt

import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei']   
matplotlib.rcParams['font.family']='sans-serif'  
matplotlib.rcParams['axes.unicode_minus'] = False

加载数据

In [2]:

def loadDataSet(filename):
    X = []
    Y = []
    with open(filename, 'rb') as f:
        for idx , line in enumerate(f):
            line = line.decode('utf-8').strip()
            if not line:
                continue
                
            eles = line.split()
            
            if idx == 0:
                numFea = len(eles)
                
            eles = list(map(float, eles))
            
            X.append(eles[:-1])      # 除了最后一列我们都当做特征
            Y.append([eles[-1]])     # 最后一列当做标注
    return np.array(X), np.array(Y)

In [3]:

def sigmoid(z):                       # sigmoid函数，也叫做logistic函数
    return 1.0 / (1.0 + np.exp(-z))   # 用浮点数，尽量使用

代价函数

In [4]:

def J(theta, X, y, theLambda = 0):     # 一开始把正则化项设为 0 
    m, n = X.shape
    h = sigmoid(np.dot(X, theta))
    J = (-1.0 / m)*(np.log(h).T.dot(y) + np.log(1 - h).T.dot(1 - y)) + (theLambda/(2.0*m))*np.sum(np.square(theta[1:]))
    
    if np.isnan(J[0]):
        return np.inf
    
    return J.flatten()[0]               # 因为返回的J 是一个ndarray，所以把J 打平之后拿到第一个数值

梯度下降

In [20]:

def gradient(X, y, options):            # X是样本，y是标注，要么是0要么是1，要么是我们给的其他标注，options是一个超级参
    """                                 # 数我们给他赋值
    options.alpha 学习率
    options.theLambda 正则参数λ
    options.maxLoop 最大迭代轮次
    options.epsilon 判断收敛的阈值
    options.method
        - 'sgd' 随机梯度下降
        - 'bgd' 批量梯度下降
    """
    m, n = X.shape                      # 拿到样本和特征的数目
    
    # 初始化模型参数，n个特征对应n和参数
    theta = np.zeros((n,1))
   
    error = J(theta, X, y)              # 当前误差
    errors = [error,]                   # 迭代中的每一轮的误差
    thetas = [theta,]
    
    alpha = options.get('alpha', 0.01)              # 超级参数第一个是传入的值，如果没有传入值就用后面的默认值
    epsilon = options.get('epsilon', 0.00001)
    maxLoop = options.get('maxLoop', 1000)
    theLambda = float(options.get('theLambda', 0))  # 后面有 theLambda/m 的计算，如果这里不转成float，后面这个就全是0
    method = options.get('method', 'bgd')
    
    # 随机梯度下降
    def _sgd(theta):     
        count = 0 # 迭代轮次
        converged = False
        while count < maxLoop:
            if converged:
                break
            # 随机梯度下降，每一个样本都更新，for循环中的是对每一个样本单独训练
            for i in range(m):
                # 拿到地X[i]个样本，把他reshape成一个1 x N 的矩阵，跟 N x 1 的theta点乘
                h = sigmoid(np.dot(X[i].reshape((1,n)), theta))     
                # h 获得当前样本的对应事件的概率
                theta = theta - alpha*((1.0/m)*X[i].reshape((n,1)) * (h - y[i]) + (theLambda/m)*np.r_[[[0]], theta[1:]])
                
                thetas.append(theta)
                error = J(theta, X, y, theLambda)
                errors.append(error)
                if abs(errors[-1] - errors[-2]) < epsilon:
                    converged = True
                    break
            count += 1
        return thetas,errors,count
    
    # 批量梯度下降
    def _bgd(theta):
        count = 0
        converged = False
        while count < maxLoop:
            if converged:
                break
            
            h = sigmoid(np.dot(X, theta))
            
            theta = theta - alpha*((1.0/m)* np.dot(X.T, (h - y)) + (theLambda/m)*np.r_[[[0]], theta[1:]])
            
            thetas.append(theta)
            error = J(theta, X, y, theLambda)
            errors.append(error)
            
            count += 1
            
            if abs(errors[-1] - errors[-2]) < epsilon:
                converged = True
                break
            
        return thetas, errors, count
            
    
    methods = {'sgd': _sgd, 'bgd': _bgd}             # 通过用户传过来sgd或者bgd从这个字典中找对应的函数
    return methods[method](theta)

加载数据

In [6]:

ori_X, y = loadDataSet('./data/linear.txt')
m, n = ori_X.shape
X = np.concatenate((np.ones((m, 1)), ori_X), axis = 1)

In [7]:

options = {
    'alpha': 0.1, 
    'epsilon': 0.00000000001,
    'maxLoop': 10000,
    'method':'bgd' # sgd
}

训练模型

In [8]:

thetas, errors, iterationCount = gradient(X, y, options)

In [9]:

errors[-1], errors[-2], iterationCount

Out[9]:

(0.09737831553078126, 0.0973790563744117, 10000)

In [10]:

%matplotlib inline

画决策边界

In [29]:

X[i]               # X 有两个特征，第一个是偏置项，X[0]都是1

Out[29]:

array([ 1.      ,  0.63265 , -0.030612])

In [25]:

m = X[i]

In [23]:

m[1]

Out[23]:

0.63265

In [28]:

m[2]

Out[28]:

-0.030612

In [11]:

for i in range(m):
    x = X[i]
    if y[i] == 1:
        # x[1], x[2]为第一个特征和第二个特征，定义一个坐标点
        plt.scatter(x[1], x[2], marker='*', color='blue', s=50)    
    else:
        plt.scatter(x[1], x[2], marker='o', color='green', s=50)

hSpots = np.linspace(X[:,1].min(), X[:,1].max(), 100)          # 横轴x1
theta0, theta1, theta2 = thetas[-1]

vSpots = -(theta0 + theta1 * hSpots) / theta2                  # 纵轴x2
plt.plot(hSpots, vSpots, color='red', linewidth=.5)            # 画决策边界
plt.xlabel(r'$x_1$')
plt.ylabel(r'$x_2$')
plt.show()

画损失函数

In [12]:

plt.plot(range(len(errors)), errors)
plt.xlabel(u'迭代次数')
plt.ylabel(u'代价J')
plt.show()

随着迭代轮次的增加，画参数 theta 的变化曲线

In [13]:

# 其中thetasFig是参数图像，ax代表子图，plt.subplots(len(thetas[0]))返回有参数长度这么多行的子图
thetasFig, ax = plt.subplots(len(thetas[0]))   
thetas = np.asarray(thetas)                        # 编程一个array
for idx, sp in enumerate(ax):                      # 遍历子图并编号，得到所有的参数对应的编号和子图号
    thetaList = thetas[:, idx]                     # 不同列参数标记不同的参数对应画到不同的子图中
    sp.plot(range(len(thetaList)), thetaList)
    sp.set_xlabel('Number of iteration')
    sp.set_ylabel(r'$\theta_%d$'%idx)

非线性决策边界

In [14]:

from sklearn.preprocessing import PolynomialFeatures
# 非线性决策边界
ori_X, y = loadDataSet('./data/non_linear.txt')
m, n = ori_X.shape
X = np.concatenate((np.ones((m,1)), ori_X), axis=1)

# 增加多项式项，用sklearn模块中的预处理包中的多项式feature，6就是6次方特征
poly = PolynomialFeatures(6)      
XX = poly.fit_transform(X[:,1:3])
m, n = XX.shape

In [15]:

options = {
    'alpha': 1.0,
    'epsilon': 0.00000001,
    'theLambda': 0,
    'maxLoop': 10000,
    'method': 'bgd'
}

In [16]:

thetas, errors, iterationCount = gradient(XX, y, options)

In [17]:

thetas[-1], errors[-1], iterationCount

Out[17]:

(array([[ 4.15830294],
        [ 2.10551048],
        [ 5.23253682],
        [-5.83414544],
        [-7.67752609],
        [-7.28923819],
        [ 2.46147543],
        [-0.22788302],
        [ 3.2198704 ],
        [-3.09468095],
        [-4.52156975],
        [ 3.9332249 ],
        [-4.08193076],
        [-2.71489978],
        [-6.33761165],
        [-1.79250774],
        [-0.53711734],
        [ 5.72823742],
        [-4.19940451],
        [-4.29890178],
        [ 3.19454695],
        [-5.94924455],
        [ 1.31894157],
        [-0.93549023],
        [ 3.26435671],
        [-4.6439225 ],
        [-4.04001109],
        [ 0.81761338]]), 0.3142660259332875, 10000)

画非线性分类的决策边界
λ的大小对拟合影响很大

In [18]:

for i in range(m):
    x = X[i]
    if y[i] == 1:
        plt.scatter(x[1], x[2], marker='*', color='blue', s=50)
    else:
        plt.scatter(x[1], x[2], marker='o', color='green', s=50)

# 绘制决策边界
# 使用康托图，只把分类边界的线画出来
x1Min,x1Max,x2Min,x2Max = X[:, 1].min(), X[:, 1].max(), X[:, 2].min(), X[:, 2].max()
xx1, xx2 = np.meshgrid(np.linspace(x1Min, x1Max), np.linspace(x2Min, x2Max))
h = sigmoid(poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas[-1]))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], colors='red')
plt.show()

In [19]:

for i in range(m):
    x = X[i]
    if y[i] == 1:
        plt.scatter(x[1], x[2], marker='*', color='blue', s=50)
    else:
        plt.scatter(x[1], x[2], marker='o', color='green', s=50)

# 绘制决策边界
x1Min,x1Max,x2Min,x2Max = X[:, 1].min(), X[:, 1].max(), X[:, 2].min(), X[:, 2].max()
xx1, xx2 = np.meshgrid(np.linspace(x1Min, x1Max), np.linspace(x2Min, x2Max))
h = sigmoid(poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas[-1]))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], colors='red')
plt.show()