from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
X, y_ = make_blobs(n_samples=10000, n_features=2, centers=2)#, random_state=123)
# plt.figure(figsize=(8,6))
# plt.scatter(X[:,0],X[:,1], c=y_)
# plt.pause(0.5)
# plt.close()
y = y_[:, np.newaxis] # 维度从(10000,)变为(10000, 1)
x_train, x_test, y_train, y_test = train_test_split(X,y)
x_train = np.c_[x_train, np.ones(x_train.shape[0])]
x_test = np.c_[x_test, np.ones(x_test.shape[0])]
class Logstic_GD:
def __init__(self):
self.W = None
def sigmod(self, z):
return 1 / (1+np.exp(-z))
def Logstic_train(self, x_hyperplane, x_train, y_train, epoch, learning_rate):
n_samples, n_feature = x_train.shape
self.W = np.zeros((n_feature, 1))
losses = []
predictions = []
for i in range(epoch):
y_pred = self.sigmod(np.dot(x_train, self.W))
self.dw = (np.dot(x_train.T, (y_pred-y_train)))/n_samples
self.W -=learning_rate*self.dw
prediction = -(x_hyperplane * self.W[0] + self.W[-1]) / self.W[1] # 用来画动态决策面
predictions.append(prediction)
loss = -(y_train.T.dot(np.log(y_pred) + (1-y_train).T.dot(np.log(1-y_pred))))/n_samples # (1,7500).dot(7500,1) 得到的结果大小为(1,1),[[0.002]], 这是矩阵运算的结果
loss = loss[0][0]
losses.append(loss)
if i % 10 == 0:
print(f"At {i} epoch, loss is {loss}")
return self.W, losses, predictions
def prediction(self, x_test, y_test):
y_pred = self.sigmod(np.dot(x_test, self.W))
# y_pred = [1 if elem>0.5 else 0 for elem in y_pred]
# y_diff = np.array(y_pred)[:, np.newaxis] - y_test
y_diff= y_pred - y_test
score = 1-np.mean(np.abs(y_diff))
print(f"score is {score*100}%")
return y_pred
if __name__ == "__main__":
xmin = np.min(X[:, 0])
xmax = np.max(X[:, 0])
x_hyperplane = np.array([xmin, xmax])
l = Logstic_GD()
Weight, losses, predictions = l.Logstic_train(x_hyperplane, x_train, y_train, 1000, 0.8)
l.prediction(x_train, y_train)
y_pred = l.prediction(x_test, y_test)
ymin = -(xmin*Weight[0]+Weight[-1])/Weight[1]
ymax = -(xmax*Weight[0]+Weight[-1])/Weight[1] # x1w1+x2w2+b=0
plt.figure(figsize=(8, 6))
ax = plt.subplot(1,2,1)
ax.scatter(X[:, 0], X[:, 1], c=y_)
plt.ion() # 必不可少
# plt.show() # 可有可无
for i in range(1000):
if i % 20 ==0:
try:
ax.lines.remove(lines[0])
except Exception:
pass
lines = ax.plot(x_hyperplane, predictions[i], c='blue', linewidth=2)
# plt.ioff() # 加上这个才能显示subplot(1,2,2)的内容,写在这2个位置都是可以的
plt.pause(0.1)
plt.ioff()
# plt.plot([xmin, xmax], [ymin, ymax], c='blue', linewidth=2) # 决策面
plt.subplot(1,2,2)
plt.plot(np.arange(1000), losses, linewidth=2)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()
python实现基本的机器学习算法系列(2):logstic回归
最新推荐文章于 2024-03-28 00:45:34 发布
本文是Python实现基本机器学习算法系列的第二篇,重点讲解逻辑回归的原理、实现过程以及如何在Python中使用它进行预测分析。通过实例展示了如何加载数据、预处理、训练模型并评估性能。

727

被折叠的 条评论
为什么被折叠?



