大家好,我是带我去滑雪!
逻辑回归是一种用于解决分类问题的统计学习方法,它常被用于二分类问题,即将数据分为两个类别。逻辑回归的目标是根据输入的特征来预测输出为某个类别的概率。逻辑回归模型在简单性、可解释性、稳定性和性能评估等方面具有优势,并且可以通过绘制混淆矩阵、ROC曲线和特征变量重要性排序图来进一步评估和优化模型。下面开始代码实战。
目录
(1)导入相关模块与数据
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_curve
from sklearn.metrics import roc_curve, auc
data = pd.read_csv(r'E:\工作\硕士\博客\博客粉丝问题\data.csv',encoding="utf-8")
data = data.fillna(-1)#补全数据
print(data)
(2)划分训练集与测试集,并进行标准化
y=data.iloc[:,-1]
print(y)
X=data.iloc[:,:-1]
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.1, random_state=33)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state= 0)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test= scaler.transform(X_test)
print('训练数据形状:')
print(X_train.shape,y_train.shape)
print('验证测试数据形状:')
(X_test.shape,y_test.shape)
print(y_test)
(3)构建逻辑回归模型并计算相关指标
clf = LogisticRegression()
clf.fit(train_x, train_y)
predict_y = clf.predict(test_x)
cm = confusion_matrix(test_y, predict_y)
# 显示模型评估分数
def show_metrics():
tp = cm[1,1]
fn = cm[1,0]
fp = cm[0,1]
tn = cm[0,0]
print('TP:{}\nFN:{}\nFP:{}\nTN:{}'.format(tp,fn,fp,tn))
print('精确率: {:.3f}'.format(tp/(tp+fp)))
print('召回率: {:.3f}'.format(tp/(tp+fn)))
print('F1值: {:.3f}'.format(2*(((tp/(tp+fp))*(tp/(tp+fn)))/((tp/(tp+fp))+(tp/(tp+fn))))))
show_metrics()
输出结果:
TP:10 FN:3 FP:3 TN:6 精确率: 0.769 召回率: 0.769 F1值: 0.769
(4)绘制混淆矩阵图
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import tree
import seaborn as sns
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_y, predict_y)
plt.figure()
sns.heatmap(cm , annot=True, cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.savefig(r'E:\工作\硕士\博客\博客粉丝问题\逻辑回归混淆矩阵.png',
bbox_inches ="tight",
pad_inches = 1,
transparent = True,
facecolor ="w",
edgecolor ='w',
dpi=300,
orientation ='landscape')
输出结果:
(5)绘制ROC曲线
def plot_roc_auc(test_y, score_y):
fpr,tpr,threshold = roc_curve(test_y, score_y) ###计算真正率和假正率
roc_auc = auc(fpr,tpr) ###计算auc的值
print('AUC:{}'.format(roc_auc))
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',lw=lw, label='AUC = %0.2f' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('1 - Specificity')
plt.ylabel('Sensitivity')
# plt.title('ROCs for Densenet')
plt.legend(loc="lower right")
plt.legend(loc="lower right")
plt.savefig(r'E:\工作\硕士\博客\博客粉丝问题\逻辑回归ROC曲线.png',
bbox_inches ="tight",
pad_inches = 1,
transparent = True,
facecolor ="w",
edgecolor ='w',
dpi=300,
orientation ='landscape')
# 预测样本的置信分数
score_y = clf.decision_function(test_x)
fpr,tpr,threshold = roc_curve(test_y, score_y) ###计算真正率和假正率
roc_auc = auc(fpr,tpr) ###计算auc的值
print('AUC:{}'.format(roc_auc))
plot_roc_auc(test_y,score_y)
输出结果:
需要数据集的家人们可以去百度网盘(永久有效)获取:
链接:https://pan.baidu.com/s/173deLlgLYUz789M3KHYw-Q?pwd=0ly6
提取码:2138
更多优质内容持续发布中,请移步主页查看。
若有问题可邮箱联系:1736732074@qq.com
博主的WeChat:TCB1736732074
点赞+关注,下次不迷路!