python实现基本的机器学习算法系列（2）：logstic回归

最新推荐文章于 2024-03-28 00:45:34 发布

原创最新推荐文章于 2024-03-28 00:45:34 发布 · 385 阅读

0 ·

CC 4.0 BY-SA版权

python 同时被 2 个专栏收录

21 篇文章

订阅专栏

机器学习

12 篇文章

订阅专栏

本文是Python实现基本机器学习算法系列的第二篇，重点讲解逻辑回归的原理、实现过程以及如何在Python中使用它进行预测分析。通过实例展示了如何加载数据、预处理、训练模型并评估性能。

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np


X, y_ = make_blobs(n_samples=10000, n_features=2, centers=2)#, random_state=123)
# plt.figure(figsize=(8,6))
# plt.scatter(X[:,0],X[:,1], c=y_)
# plt.pause(0.5)
# plt.close()

y = y_[:, np.newaxis]   # 维度从(10000,)变为(10000, 1)
x_train, x_test, y_train, y_test = train_test_split(X,y)

x_train = np.c_[x_train, np.ones(x_train.shape[0])]
x_test = np.c_[x_test, np.ones(x_test.shape[0])]


class Logstic_GD:
    def __init__(self):
        self.W = None

    def sigmod(self, z):
        return 1 / (1+np.exp(-z))

    def Logstic_train(self, x_hyperplane, x_train, y_train, epoch, learning_rate):
        n_samples, n_feature = x_train.shape
        self.W = np.zeros((n_feature, 1))
        losses = []
        predictions = []

        for i in range(epoch):
            y_pred = self.sigmod(np.dot(x_train, self.W))
            self.dw = (np.dot(x_train.T, (y_pred-y_train)))/n_samples
            self.W -=learning_rate*self.dw

            prediction = -(x_hyperplane * self.W[0] + self.W[-1]) / self.W[1]  # 用来画动态决策面
            predictions.append(prediction)

            loss = -(y_train.T.dot(np.log(y_pred) + (1-y_train).T.dot(np.log(1-y_pred))))/n_samples   # (1,7500).dot(7500,1) 得到的结果大小为(1，1),[[0.002]], 这是矩阵运算的结果
            loss = loss[0][0]
            losses.append(loss)
            if i % 10 == 0:
                print(f"At {i} epoch, loss is {loss}")

        return self.W, losses, predictions

    def prediction(self, x_test, y_test):
        y_pred = self.sigmod(np.dot(x_test, self.W))
        # y_pred = [1 if elem>0.5 else 0 for elem in y_pred]
        # y_diff = np.array(y_pred)[:, np.newaxis] - y_test
        y_diff= y_pred - y_test
        score = 1-np.mean(np.abs(y_diff))

        print(f"score is {score*100}%")

        return y_pred

if __name__ == "__main__":
    xmin = np.min(X[:, 0])
    xmax = np.max(X[:, 0])
    x_hyperplane = np.array([xmin, xmax])

    l = Logstic_GD()
    Weight, losses, predictions = l.Logstic_train(x_hyperplane, x_train, y_train, 1000, 0.8)
    l.prediction(x_train, y_train)
    y_pred = l.prediction(x_test, y_test)

    ymin = -(xmin*Weight[0]+Weight[-1])/Weight[1]
    ymax = -(xmax*Weight[0]+Weight[-1])/Weight[1]  # x1w1+x2w2+b=0

    plt.figure(figsize=(8, 6))
    ax = plt.subplot(1,2,1)
    ax.scatter(X[:, 0], X[:, 1], c=y_)
    plt.ion()  # 必不可少
    # plt.show()  # 可有可无

    for i in range(1000):
        if i % 20 ==0:
            try:
                ax.lines.remove(lines[0])
            except Exception:
                pass
            lines = ax.plot(x_hyperplane, predictions[i], c='blue', linewidth=2)
            # plt.ioff()  # 加上这个才能显示subplot(1,2,2)的内容，写在这2个位置都是可以的
            plt.pause(0.1)
    plt.ioff()

    # plt.plot([xmin, xmax], [ymin, ymax], c='blue', linewidth=2)    # 决策面

    plt.subplot(1,2,2)
    plt.plot(np.arange(1000), losses, linewidth=2)
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.show()