逻辑回归正则化手写python代码

最新推荐文章于 2022-05-06 18:46:06 发布

搞测绘的

最新推荐文章于 2022-05-06 18:46:06 发布

阅读量359

点赞数

CC 4.0 BY-SA版权

文章标签： python 逻辑回归机器学习

本文链接：https://blog.youkuaiyun.com/go_with_the_wind/article/details/124596539

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 16 15:32:35 2022

@author: ciro
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

path = 'ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])
#%%scatter plot
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]

plt.figure()
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')

#%% preprossing
data2 = data.copy()
if 'Ones' not in data2:
    data2.insert(0, 'Ones', 1)
data2.head()
cols = data2.shape[1]
X = data2.iloc[:, 0:cols-1]      #X是data所有行，去掉最后一列
y = data2.iloc[:, cols-1:cols]   #y是所有行，最后一列

X.head()
X = np.array(X)
print(X.shape, type(X))

y = np.array(y)
print(y.shape, type(y))

theta = np.zeros((3,1)) # theta是一个3*1的矩阵（向量），初始化为0
theta

#%%define functions
from numba import jit, cuda
from timeit import default_timer as timer  
@jit
def sigmoid(z):
    
    ### START CODE HERE ### (≈ 1 line of code)
    s = 1/(1+np.exp(-z))
    ### END CODE HERE ###
    
    return s
@jit
def computeCost(X, y, theta):
    """计算代价函数
    
    利用Python的向量化运算实现Cost Function的计算
    
    Args:
        X: 输入样本数据，m*n的矩阵，m为样本的数量，n为特征的数量
        y: 输入样本数据的输出真值，m*1的矩阵
        theta: 模型的参数，n*1的矩阵
    
    Returns:
        cost: 当前输入X, y, theta下的模型代价    
    """
    
    ### START CODE HERE ###
    X_theta = np.dot(X,theta);
    
    cost = -((y*np.log(sigmoid(X_theta))) + (1-y) * np.log(1 - sigmoid(X_theta))).mean();
    
    ### END CODE HERE ###
    
    return cost
@jit
def gradient(X, y, theta):
    
    ### START CODE HERE ###
    m = X.shape[0]
    X_theta = np.dot(X,theta);
    g = np.dot(X.T, sigmoid(X_theta) - y)/m
    ### END CODE HERE ###
    
    return g
@jit
def batch_gradient_descent(X, y, theta, epoch, lr=0.01):
    
    ### START CODE HERE ###
    for k in range(epoch):
        cost = computeCost(X, y, theta)
        j = gradient(X, y, theta)
        theta = theta - lr*j
    cost_his = computeCost(X, y, theta)

    ### END CODE HERE ###
    
    return theta, cost_his

#%%gradient descend by itrating(feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
X_scale = scaling.fit_transform(X[:,1:])
ones_col = np.ones((X.shape[0]))
X_scale = np.insert(X_scale,0, ones_col, axis = 1)
epoch = 100000
theta = np.zeros((3,1))
costs_with_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
    final_theta, cost = batch_gradient_descent(X_scale, y, theta, epoch = epoch_series,lr = 0.01)
    costs_with_scaling.append(cost)

#%%decision boudary feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]

m = X_scale.shape[0]
X_theta = sigmoid(X_scale @ final_theta)
A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
data
test1 = np.array(X_scale[:,1:2])
test2 = np.array(X_scale[:,2:])
Test1, Test2 = np.meshgrid(test1, test2)
score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2

cs = plt.contour(Test1, Test2, score , 0)

positive_scale1 = scaling.fit_transform(np.array(positive['Exam 1']).reshape(-1,1))
positive_scale2 = scaling.fit_transform(np.array(positive['Exam 2']).reshape(-1,1))
negative_scale1 = scaling.fit_transform(np.array(negative['Exam 1']).reshape(-1,1))
negative_scale2 = scaling.fit_transform(np.array(negative['Exam 2']).reshape(-1,1))

plt.scatter(positive_scale1, positive_scale2, s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative_scale1, negative_scale2, s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Scaled Exam 1 Score')
plt.ylabel('Scaled Exam 2 Score')
plt.savefig('decsion_boudary', dpi = 300)
#%%gradient descend by itrating(without feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
epoch = 100000
theta = np.zeros((3,1))
costs_without_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
    final_theta, cost = batch_gradient_descent(X, y, theta, epoch = epoch_series,lr = 0.01)
    costs_without_scaling.append(cost)
#%%cost yield epoches
plt.plot(np.arange(2, 100000, 500), costs_with_scaling,label = 'feature scaling')
plt.plot(np.arange(2, 100000, 500), costs_without_scaling,label = 'without feature scaling')
plt.xlabel('epoches')
plt.ylabel('costs')
plt.legend()
plt.grid()
plt.savefig("epochs_costs", dpi = 300)
#%%decision boudary without feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
theta = np.zeros((3,1))
color = ['r','g','b']
for idx,lambs in enumerate([0, 20, 50]):
    theta = np.zeros((3,1))
    final_theta = batch_gradient_descent(X, y, theta, epoch = 1000000,lr = 0.01,lamb = lambs)
    m = X.shape[0]
    print(-final_theta[0]/final_theta[2])
    print(-final_theta[1]/final_theta[2])
    X_theta = sigmoid(X @ final_theta)
    A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
    test1 = np.sort(np.array(X[:,1:2]))
    test2 = np.sort(np.array(X[:,2:]))
    Test1, Test2 = np.meshgrid(test1, test2)
    score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2
    cs = plt.contour(Test1, Test2, score , 0, colors = color[idx])
    cs.collections[0].set_label('lamb = '+str(lambs))
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.savefig('decsion_boudary_without_scaling_regulazation', dpi = 300)

#%% traing set and test set
import numpy as np
def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data)*test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

train_data, test_data = split_train_test(data, 0.1)

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import linear_model
logic_model = linear_model.LogisticRegression()    

def plot_learning_curves(model, X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
    train_errors, val_errors = [], []
    for m in np.arange(3, len(X_train)):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_val_predict = model.predict(X_val)
        y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
        y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
        train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))
        val_errors.append(mean_squared_error(y_val_predict, y_val))
    plt.plot(np.sqrt(train_errors), "r-+", linewidth = 2, label = "training set")
    plt.plot(np.sqrt(val_errors), "b-+", linewidth = 2, label = "validation set")
    plt.legend()
    plt.xlabel('Traing set size')
    plt.ylabel('RMSE')
    plt.grid()
    plt.savefig('learning curve(no scale)', dpi = 300)

plot_learning_curves(logic_model, X, y)




#%% RMSE vs Epoch  def plot_learning_curves_epoches(X, y):
def plot_learning_curves_epoches(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
    train_errors, val_errors = [], []
    for epochs in np.arange(2, 100000, 500):
        theta = np.zeros((X.shape[1], 1))
        final_theta , a= batch_gradient_descent(X, y, theta, epoch=epochs, lr = 0.001)
        y_train_predict = np.array((X_train @ final_theta))
        y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
        
        y_val_predict =  np.array((X_val @ final_theta))
        y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
        train_errors.append(mean_squared_error(y_train_predict, y_train))
        val_errors.append(mean_squared_error(y_val_predict, y_val))
    plt.plot(np.arange(2, 100000, 500),np.sqrt(train_errors), "r", linewidth = 1.5, label = "training set")
    plt.plot(np.arange(2, 100000, 500),np.sqrt(val_errors), "b", linewidth = 1.5, label = "validation set")
    plt.legend()
    plt.xlabel('epoches')
    plt.ylabel('RMSE')
    plt.grid()
    plt.savefig('learning curve of epoches(scale', dpi = 300)
    
plot_learning_curves_epoches(X_scale, y)