logistics学习记录
学习于:https://blog.youkuaiyun.com/qq_37667364/article/details/81532339
import numpy as np
import math
from sklearn import datasets
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
infinity = float(-2 ** 31)
def sigmodFormatrix(X_b, thetas):
params = - X_b.dot(thetas) # 计算X_b × thetas得到75*1的矩阵
r = 1 / (1 + np.exp(params))
return r
def sigmodFormatrix2(X_b, thetas):
params = - X_b.dot(thetas)
r = 1 / (1 + np.exp(params))
for i in range(len(r)):
if r[i] >= 0.5:
r[i] = 1
else:
r[i] = 0
return r
def sigmod(Xi, thetas):
params = - np.sum(Xi * thetas)
r = 1 / (1 + math.exp(params))
return r
class LinearLogisticRegression(object):
thetas = None
m = 0
# 训练
def fit(self, X, y, alpha=0.01, accuracy=0.00001):
# 插入第一列为1,构成xb矩阵
self.thetas = np.full(X.shape[1] + 1, 0.5) # 初始化参数theta
self.m = X.shape[0] # 获取训练样本的数量
a = np.full((self.m, 1), 1)
X_b = np.column_stack((a, X)) # 按列进行组合
dimension = X.shape[1] + 1
# 梯度下降迭代
count = 1
while True:
oldJ = self.costFunc(X_b, y) # 更新前的代价
# 注意预测函数中使用的参数是未更新的
c = sigmodFormatrix(X_b, self.thetas) - y
for j in range(dimension):
self.thetas[j] = self.thetas[j] - alpha * np.sum(c * X_b[:, j])
newJ = self.costFunc(X_b, y) # 更新后的代价
if newJ == oldJ or math.fabs(newJ - oldJ) < accuracy:
print("代价函数迭代到最小值,退出!")
print("代价函数收敛到:", newJ)
break
if count % 30 == 0:
print("迭代第", count, "次!")
print("代价函数上一次的差:", (newJ - oldJ))
count += 1
print("一共迭代了:", count)
# 计算代价函数
def costFunc(self, Xb, y):
sum = .0
for i in range(self.m):
yPre = sigmod(Xb[i, ], self.thetas)
# print("yPre:",yPre)
if yPre == 1 or yPre == 0:
return infinity
sum += y[i] * math.log(yPre) + (1 - y[i]) * math.log(1 - yPre)
return -1 / self.m * sum
def predict(self, X):
a = np.full((X.shape[0], 1), 1)
Xb = np.column_stack((a, X))
return sigmodFormatrix2(Xb, self.thetas)
def score(self, X_test, y_test):
y_predict = myLogistic.predict(X_test)
re = (y_test == y_predict)
re1 = Counter(re)
a = re1[True] / (re1[True] + re1[False])
return a
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
X = X[y != 2] # 取出其中的两类数据用于做logistic回归
y = y[y != 2]
X_train, X_test, y_train, y_test = train_test_split(X, y) # 默认为0.75划分数据集
myLogistic = LinearLogisticRegression()
myLogistic.fit(X_train, y_train)
y_predict = myLogistic.predict(X_test)
print("参数thetas:", myLogistic.thetas)
print("测试数据准确度:", myLogistic.score(X_test, y_test))
print("训练数据准确度:", myLogistic.score(X_train, y_train))
print("======================================================================================")
print("sklearn中的逻辑回归:")
logr = LogisticRegression()
logr.fit(X_train, y_train)
print("训练集准确度:", logr.score(X_train, y_train))
print("测试集准确度:", logr.score(X_test, y_test))
print(logr.get_params())