【建立函数实现logistics回归】

最新推荐文章于 2024-05-26 16:35:53 发布

雪寻花

最新推荐文章于 2024-05-26 16:35:53 发布

阅读量177

点赞数 1

文章标签： python 开发语言线性回归

原文链接：https://blog.youkuaiyun.com/qq_37667364/article/details/81532339

版权

logistics学习记录

学习于：https://blog.youkuaiyun.com/qq_37667364/article/details/81532339

import numpy as np
import math
from sklearn import datasets
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

infinity = float(-2 ** 31)
def sigmodFormatrix(X_b, thetas):
    params = - X_b.dot(thetas)  # 计算X_b × thetas得到75*1的矩阵
    r = 1 / (1 + np.exp(params))
    return r

def sigmodFormatrix2(X_b, thetas):
    params = - X_b.dot(thetas)
    r = 1 / (1 + np.exp(params))
    for i in range(len(r)):
        if r[i] >= 0.5:
            r[i] = 1
        else:
            r[i] = 0
    return r

def sigmod(Xi, thetas):
    params = - np.sum(Xi * thetas)
    r = 1 / (1 + math.exp(params))
    return r

class LinearLogisticRegression(object):
    thetas = None
    m = 0
    # 训练
    def fit(self, X, y, alpha=0.01, accuracy=0.00001):
        # 插入第一列为1，构成xb矩阵
        self.thetas = np.full(X.shape[1] + 1, 0.5)  # 初始化参数theta
        self.m = X.shape[0]  # 获取训练样本的数量
        a = np.full((self.m, 1), 1)
        X_b = np.column_stack((a, X))  # 按列进行组合
        dimension = X.shape[1] + 1
        # 梯度下降迭代
        count = 1
        while True:
            oldJ = self.costFunc(X_b, y)  # 更新前的代价
            # 注意预测函数中使用的参数是未更新的
            c = sigmodFormatrix(X_b, self.thetas) - y
            for j in range(dimension):
                self.thetas[j] = self.thetas[j] - alpha * np.sum(c * X_b[:, j])
            newJ = self.costFunc(X_b, y)  # 更新后的代价
            if newJ == oldJ or math.fabs(newJ - oldJ) < accuracy:
                print("代价函数迭代到最小值，退出！")
                print("代价函数收敛到:", newJ)
                break
            if count % 30 == 0:
                print("迭代第", count, "次!")
                print("代价函数上一次的差:", (newJ - oldJ))
            count += 1
        print("一共迭代了：", count)
    # 计算代价函数
    def costFunc(self, Xb, y):
        sum = .0
        for i in range(self.m):
            yPre = sigmod(Xb[i, ], self.thetas)
            # print("yPre:",yPre)
            if yPre == 1 or yPre == 0:
                return infinity
            sum += y[i] * math.log(yPre) + (1 - y[i]) * math.log(1 - yPre)
        return -1 / self.m * sum

    def predict(self, X):
        a = np.full((X.shape[0], 1), 1)
        Xb = np.column_stack((a, X))
        return sigmodFormatrix2(Xb, self.thetas)

    def score(self, X_test, y_test):
        y_predict = myLogistic.predict(X_test)
        re = (y_test == y_predict)
        re1 = Counter(re)
        a = re1[True] / (re1[True] + re1[False])
        return a

iris = datasets.load_iris()
X = iris['data']
y = iris['target']
X = X[y != 2]  # 取出其中的两类数据用于做logistic回归
y = y[y != 2]
X_train, X_test, y_train, y_test = train_test_split(X, y)  # 默认为0.75划分数据集
myLogistic = LinearLogisticRegression()
myLogistic.fit(X_train, y_train)
y_predict = myLogistic.predict(X_test)
print("参数thetas:", myLogistic.thetas)
print("测试数据准确度:", myLogistic.score(X_test, y_test))
print("训练数据准确度:", myLogistic.score(X_train, y_train))
print("======================================================================================")
print("sklearn中的逻辑回归:")
logr = LogisticRegression()
logr.fit(X_train, y_train)
print("训练集准确度:", logr.score(X_train, y_train))
print("测试集准确度:", logr.score(X_test, y_test))
print(logr.get_params())

在这里插入图片描述