逻辑回归正则化手写python代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 16 15:32:35 2022

@author: ciro
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

path = 'ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])
#%%scatter plot
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]

plt.figure()
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')

#%% preprossing
data2 = data.copy()
if 'Ones' not in data2:
    data2.insert(0, 'Ones', 1)
data2.head()
cols = data2.shape[1]
X = data2.iloc[:, 0:cols-1]      #X是data所有行,去掉最后一列
y = data2.iloc[:, cols-1:cols]   #y是所有行,最后一列

X.head()
X = np.array(X)
print(X.shape, type(X))

y = np.array(y)
print(y.shape, type(y))

theta = np.zeros((3,1)) # theta是一个3*1的矩阵(向量),初始化为0
theta

#%%define functions
from numba import jit, cuda
from timeit import default_timer as timer  
@jit
def sigmoid(z):
    
    ### START CODE HERE ### (≈ 1 line of code)
    s = 1/(1+np.exp(-z))
    ### END CODE HERE ###
    
    return s
@jit
def computeCost(X, y, theta):
    """计算代价函数
    
    利用Python的向量化运算实现Cost Function的计算
    
    Args:
        X: 输入样本数据,m*n的矩阵,m为样本的数量,n为特征的数量
        y: 输入样本数据的输出真值,m*1的矩阵
        theta: 模型的参数,n*1的矩阵
    
    Returns:
        cost: 当前输入X, y, theta下的模型代价    
    """
    
    ### START CODE HERE ###
    X_theta = np.dot(X,theta);
    
    cost = -((y*np.log(sigmoid(X_theta))) + (1-y) * np.log(1 - sigmoid(X_theta))).mean();
    
    ### END CODE HERE ###
    
    return cost
@jit
def gradient(X, y, theta):
    
    ### START CODE HERE ###
    m = X.shape[0]
    X_theta = np.dot(X,theta);
    g = np.dot(X.T, sigmoid(X_theta) - y)/m
    ### END CODE HERE ###
    
    return g
@jit
def batch_gradient_descent(X, y, theta, epoch, lr=0.01):
    
    ### START CODE HERE ###
    for k in range(epoch):
        cost = computeCost(X, y, theta)
        j = gradient(X, y, theta)
        theta = theta - lr*j
    cost_his = computeCost(X, y, theta)

    ### END CODE HERE ###
    
    return theta, cost_his

#%%gradient descend by itrating(feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
X_scale = scaling.fit_transform(X[:,1:])
ones_col = np.ones((X.shape[0]))
X_scale = np.insert(X_scale,0, ones_col, axis = 1)
epoch = 100000
theta = np.zeros((3,1))
costs_with_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
    final_theta, cost = batch_gradient_descent(X_scale, y, theta, epoch = epoch_series,lr = 0.01)
    costs_with_scaling.append(cost)

#%%decision boudary feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]

m = X_scale.shape[0]
X_theta = sigmoid(X_scale @ final_theta)
A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
data
test1 = np.array(X_scale[:,1:2])
test2 = np.array(X_scale[:,2:])
Test1, Test2 = np.meshgrid(test1, test2)
score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2

cs = plt.contour(Test1, Test2, score , 0)

positive_scale1 = scaling.fit_transform(np.array(positive['Exam 1']).reshape(-1,1))
positive_scale2 = scaling.fit_transform(np.array(positive['Exam 2']).reshape(-1,1))
negative_scale1 = scaling.fit_transform(np.array(negative['Exam 1']).reshape(-1,1))
negative_scale2 = scaling.fit_transform(np.array(negative['Exam 2']).reshape(-1,1))

plt.scatter(positive_scale1, positive_scale2, s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative_scale1, negative_scale2, s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Scaled Exam 1 Score')
plt.ylabel('Scaled Exam 2 Score')
plt.savefig('decsion_boudary', dpi = 300)
#%%gradient descend by itrating(without feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
epoch = 100000
theta = np.zeros((3,1))
costs_without_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
    final_theta, cost = batch_gradient_descent(X, y, theta, epoch = epoch_series,lr = 0.01)
    costs_without_scaling.append(cost)
#%%cost yield epoches
plt.plot(np.arange(2, 100000, 500), costs_with_scaling,label = 'feature scaling')
plt.plot(np.arange(2, 100000, 500), costs_without_scaling,label = 'without feature scaling')
plt.xlabel('epoches')
plt.ylabel('costs')
plt.legend()
plt.grid()
plt.savefig("epochs_costs", dpi = 300)
#%%decision boudary without feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
theta = np.zeros((3,1))
color = ['r','g','b']
for idx,lambs in enumerate([0, 20, 50]):
    theta = np.zeros((3,1))
    final_theta = batch_gradient_descent(X, y, theta, epoch = 1000000,lr = 0.01,lamb = lambs)
    m = X.shape[0]
    print(-final_theta[0]/final_theta[2])
    print(-final_theta[1]/final_theta[2])
    X_theta = sigmoid(X @ final_theta)
    A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
    test1 = np.sort(np.array(X[:,1:2]))
    test2 = np.sort(np.array(X[:,2:]))
    Test1, Test2 = np.meshgrid(test1, test2)
    score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2
    cs = plt.contour(Test1, Test2, score , 0, colors = color[idx])
    cs.collections[0].set_label('lamb = '+str(lambs))
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.savefig('decsion_boudary_without_scaling_regulazation', dpi = 300)

#%% traing set and test set
import numpy as np
def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data)*test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

train_data, test_data = split_train_test(data, 0.1)

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import linear_model
logic_model = linear_model.LogisticRegression()    

def plot_learning_curves(model, X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
    train_errors, val_errors = [], []
    for m in np.arange(3, len(X_train)):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_val_predict = model.predict(X_val)
        y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
        y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
        train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))
        val_errors.append(mean_squared_error(y_val_predict, y_val))
    plt.plot(np.sqrt(train_errors), "r-+", linewidth = 2, label = "training set")
    plt.plot(np.sqrt(val_errors), "b-+", linewidth = 2, label = "validation set")
    plt.legend()
    plt.xlabel('Traing set size')
    plt.ylabel('RMSE')
    plt.grid()
    plt.savefig('learning curve(no scale)', dpi = 300)

plot_learning_curves(logic_model, X, y)




#%% RMSE vs Epoch  def plot_learning_curves_epoches(X, y):
def plot_learning_curves_epoches(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
    train_errors, val_errors = [], []
    for epochs in np.arange(2, 100000, 500):
        theta = np.zeros((X.shape[1], 1))
        final_theta , a= batch_gradient_descent(X, y, theta, epoch=epochs, lr = 0.001)
        y_train_predict = np.array((X_train @ final_theta))
        y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
        
        y_val_predict =  np.array((X_val @ final_theta))
        y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
        train_errors.append(mean_squared_error(y_train_predict, y_train))
        val_errors.append(mean_squared_error(y_val_predict, y_val))
    plt.plot(np.arange(2, 100000, 500),np.sqrt(train_errors), "r", linewidth = 1.5, label = "training set")
    plt.plot(np.arange(2, 100000, 500),np.sqrt(val_errors), "b", linewidth = 1.5, label = "validation set")
    plt.legend()
    plt.xlabel('epoches')
    plt.ylabel('RMSE')
    plt.grid()
    plt.savefig('learning curve of epoches(scale', dpi = 300)
    
plot_learning_curves_epoches(X_scale, y)


<think>嗯,用户让我介绍一个逻辑回归手写数字识别的代码示例,需要用中文回答。首先,我需要确定用户的需求是什么。可能他们正在学习机器学习的基础知识,想通过一个实际的例子来理解逻辑回归的应用。手写数字识别是一个经典的入门项目,通常使用MNIST数据集,但逻辑回归可能更适合较小的数据集,比如Scikit-learn里的digits数据集,因为逻辑回归在处理高维数据时可能效率不高。 接下来,我需要考虑用户的知识水平。他们可能已经了解逻辑回归的基本概念,但需要具体的代码实现指导。因此,代码示例需要简洁明了,同时包含必要的步骤:数据加载、预处理、模型训练、评估和预测。可能还需要解释每个步骤的作用,特别是数据预处理部分,比如归一化的重要性,因为特征缩放可以加速模型收敛。 然后,我需要选择合适的库。Scikit-learn是Python中常用的机器学习库,提供了逻辑回归的实现和内置的数据集,非常适合这个示例。使用Matplotlib或Seaborn来可视化样本数据也是一个好主意,帮助用户直观理解数据。 在编写代码时,要注意注释,解释每一部分的功能。比如,加载数据后展示数据形状,分割训练集和测试集,归一化处理,模型训练和评估指标(准确率、分类报告、混淆矩阵)。预测部分可以展示实际样本的预测结果,增强示例的实用性。 用户可能对某些步骤有疑问,比如为什么用LogisticRegression的multi_class='multinomial'和solver='lbfgs',需要说明这是为了处理多分类问题。同时,提醒逻辑回归在处理图像分类的局限性,比如像素作为特征可能不够有效,后续可以学习CNN等更高级的模型。 另外,用户可能希望代码可以直接运行,所以要确保示例代码的完整性和正确性,包括库的导入和数据处理的步骤。同时,建议的运行环境和可能的错误处理,比如库的版本问题,可能也需要提及,但示例中可能暂时不需要,保持简洁。 最后,总结逻辑回归在这个案例中的作用,并引导用户进一步学习更复杂的模型,这样既解决了当前问题,又提供了扩展学习的路径。整体结构需要清晰,步骤分明,让用户能够逐步理解和实现。</think>以下是一个基于Python和Scikit-learn库实现的逻辑回归手写数字识别代码示例,使用经典的手写数字数据集(MNIST的简化版): ```python # 导入必要库 import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # 加载内置数字数据集(8x8像素图像) digits = datasets.load_digits() # 展示数据样例 plt.figure(figsize=(8, 3)) for index, (image, label) in enumerate(zip(digits.images[:5], digits.target[:5])): plt.subplot(1, 5, index+1) plt.imshow(image, cmap=plt.cm.gray_r) plt.title(f'Label: {label}') plt.show() # 数据预处理 X = digits.data # 特征矩阵(每行64个像素值) y = digits.target # 目标标签(0-9) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 数据标准化(提升模型性能) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # 创建逻辑回归模型(多分类设置) # multi_class='multinomial' 表示使用softmax回归 # solver='lbfgs' 适用于小数据集的多分类问题 model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000) # 训练模型 model.fit(X_train, y_train) # 预测测试集 y_pred = model.predict(X_test) # 评估模型 print(f"测试集准确率: {accuracy_score(y_test, y_pred):.4f}") print("\n分类报告:") print(classification_report(y_test, y_pred)) print("\n混淆矩阵:") print(confusion_matrix(y_test, y_pred)) # 实际预测示例 sample_idx = 10 plt.imshow(digits.images[sample_idx], cmap=plt.cm.gray_r) plt.title(f"真实标签: {y[sample_idx]}") plt.show() print(f"模型预测结果: {model.predict(scaler.transform([X[sample_idx]]))[0]}") ``` 代码解析: 1. **数据集**:使用Scikit-learn自带的digits数据集(8x8像素手写数字,共1797个样本) 2. **关键步骤**: - 数据可视化:展示前5个样本及其标签 - 数据标准化:通过Z-score标准化提升模型性能 - 多分类逻辑回归:设置`multi_class='multinomial'`启用softmax回归 - 模型评估:输出准确率、分类报告和混淆矩阵 3. **典型输出**: - 测试集准确率通常在95%左右 - 混淆矩阵显示各类别(0-9)的预测情况 - 最后展示一个具体样本的预测结果 注意事项: 1. 逻辑回归更适合低维特征数据,对于更高分辨率的图像(如28x28的MNIST),建议使用CNN等深度学习模型 2. 可通过调整参数(如`C`值正则化强度)优化模型表现 3. 运行前需安装必要库:`pip install scikit-learn matplotlib` 进阶改进建议: - 添加PCA降维可视化 - 尝试不同的正则化方法(L1/L2) - 进行超参数调优(GridSearchCV) - 添加交叉验证 这个示例展示了逻辑回归在简单图像分类任务中的应用,虽然深度学习模型在图像识别领域表现更好,但逻辑回归作为基础算法,对于理解分类问题原理仍有重要学习价值。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值