#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 16 15:32:35 2022
@author: ciro
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = 'ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])
#%%scatter plot
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
plt.figure()
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
#%% preprossing
data2 = data.copy()
if 'Ones' not in data2:
data2.insert(0, 'Ones', 1)
data2.head()
cols = data2.shape[1]
X = data2.iloc[:, 0:cols-1] #X是data所有行,去掉最后一列
y = data2.iloc[:, cols-1:cols] #y是所有行,最后一列
X.head()
X = np.array(X)
print(X.shape, type(X))
y = np.array(y)
print(y.shape, type(y))
theta = np.zeros((3,1)) # theta是一个3*1的矩阵(向量),初始化为0
theta
#%%define functions
from numba import jit, cuda
from timeit import default_timer as timer
@jit
def sigmoid(z):
### START CODE HERE ### (≈ 1 line of code)
s = 1/(1+np.exp(-z))
### END CODE HERE ###
return s
@jit
def computeCost(X, y, theta):
"""计算代价函数
利用Python的向量化运算实现Cost Function的计算
Args:
X: 输入样本数据,m*n的矩阵,m为样本的数量,n为特征的数量
y: 输入样本数据的输出真值,m*1的矩阵
theta: 模型的参数,n*1的矩阵
Returns:
cost: 当前输入X, y, theta下的模型代价
"""
### START CODE HERE ###
X_theta = np.dot(X,theta);
cost = -((y*np.log(sigmoid(X_theta))) + (1-y) * np.log(1 - sigmoid(X_theta))).mean();
### END CODE HERE ###
return cost
@jit
def gradient(X, y, theta):
### START CODE HERE ###
m = X.shape[0]
X_theta = np.dot(X,theta);
g = np.dot(X.T, sigmoid(X_theta) - y)/m
### END CODE HERE ###
return g
@jit
def batch_gradient_descent(X, y, theta, epoch, lr=0.01):
### START CODE HERE ###
for k in range(epoch):
cost = computeCost(X, y, theta)
j = gradient(X, y, theta)
theta = theta - lr*j
cost_his = computeCost(X, y, theta)
### END CODE HERE ###
return theta, cost_his
#%%gradient descend by itrating(feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
X_scale = scaling.fit_transform(X[:,1:])
ones_col = np.ones((X.shape[0]))
X_scale = np.insert(X_scale,0, ones_col, axis = 1)
epoch = 100000
theta = np.zeros((3,1))
costs_with_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
final_theta, cost = batch_gradient_descent(X_scale, y, theta, epoch = epoch_series,lr = 0.01)
costs_with_scaling.append(cost)
#%%decision boudary feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
m = X_scale.shape[0]
X_theta = sigmoid(X_scale @ final_theta)
A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
data
test1 = np.array(X_scale[:,1:2])
test2 = np.array(X_scale[:,2:])
Test1, Test2 = np.meshgrid(test1, test2)
score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2
cs = plt.contour(Test1, Test2, score , 0)
positive_scale1 = scaling.fit_transform(np.array(positive['Exam 1']).reshape(-1,1))
positive_scale2 = scaling.fit_transform(np.array(positive['Exam 2']).reshape(-1,1))
negative_scale1 = scaling.fit_transform(np.array(negative['Exam 1']).reshape(-1,1))
negative_scale2 = scaling.fit_transform(np.array(negative['Exam 2']).reshape(-1,1))
plt.scatter(positive_scale1, positive_scale2, s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative_scale1, negative_scale2, s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Scaled Exam 1 Score')
plt.ylabel('Scaled Exam 2 Score')
plt.savefig('decsion_boudary', dpi = 300)
#%%gradient descend by itrating(without feature_scaling)
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
epoch = 100000
theta = np.zeros((3,1))
costs_without_scaling = [];
for index, epoch_series in enumerate(np.arange(2, 100000,500)):
final_theta, cost = batch_gradient_descent(X, y, theta, epoch = epoch_series,lr = 0.01)
costs_without_scaling.append(cost)
#%%cost yield epoches
plt.plot(np.arange(2, 100000, 500), costs_with_scaling,label = 'feature scaling')
plt.plot(np.arange(2, 100000, 500), costs_without_scaling,label = 'without feature scaling')
plt.xlabel('epoches')
plt.ylabel('costs')
plt.legend()
plt.grid()
plt.savefig("epochs_costs", dpi = 300)
#%%decision boudary without feature scaling
positive = data[data['Admitted'].isin([1])]
negative = data[data['Admitted'].isin([0])]
theta = np.zeros((3,1))
color = ['r','g','b']
for idx,lambs in enumerate([0, 20, 50]):
theta = np.zeros((3,1))
final_theta = batch_gradient_descent(X, y, theta, epoch = 1000000,lr = 0.01,lamb = lambs)
m = X.shape[0]
print(-final_theta[0]/final_theta[2])
print(-final_theta[1]/final_theta[2])
X_theta = sigmoid(X @ final_theta)
A = y*np.log(X_theta)+(1 - y)*np.log(1 - X_theta)
test1 = np.sort(np.array(X[:,1:2]))
test2 = np.sort(np.array(X[:,2:]))
Test1, Test2 = np.meshgrid(test1, test2)
score = final_theta[0] + final_theta[1]*Test1 + final_theta[2]*Test2
cs = plt.contour(Test1, Test2, score , 0, colors = color[idx])
cs.collections[0].set_label('lamb = '+str(lambs))
plt.scatter(positive['Exam 1'], positive['Exam 2'], s=20, c='c', marker='o', label='Admitted')
plt.scatter(negative['Exam 1'], negative['Exam 2'], s=30, c='m', marker='x', label='Not Admitted')
plt.legend()
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.savefig('decsion_boudary_without_scaling_regulazation', dpi = 300)
#%% traing set and test set
import numpy as np
def split_train_test(data, test_ratio):
shuffled_indices = np.random.permutation(len(data))
test_set_size = int(len(data)*test_ratio)
test_indices = shuffled_indices[:test_set_size]
train_indices = shuffled_indices[test_set_size:]
return data.iloc[train_indices], data.iloc[test_indices]
train_data, test_data = split_train_test(data, 0.1)
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import linear_model
logic_model = linear_model.LogisticRegression()
def plot_learning_curves(model, X, y):
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
train_errors, val_errors = [], []
for m in np.arange(3, len(X_train)):
model.fit(X_train[:m], y_train[:m])
y_train_predict = model.predict(X_train[:m])
y_val_predict = model.predict(X_val)
y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))
val_errors.append(mean_squared_error(y_val_predict, y_val))
plt.plot(np.sqrt(train_errors), "r-+", linewidth = 2, label = "training set")
plt.plot(np.sqrt(val_errors), "b-+", linewidth = 2, label = "validation set")
plt.legend()
plt.xlabel('Traing set size')
plt.ylabel('RMSE')
plt.grid()
plt.savefig('learning curve(no scale)', dpi = 300)
plot_learning_curves(logic_model, X, y)
#%% RMSE vs Epoch def plot_learning_curves_epoches(X, y):
def plot_learning_curves_epoches(X, y):
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2)
train_errors, val_errors = [], []
for epochs in np.arange(2, 100000, 500):
theta = np.zeros((X.shape[1], 1))
final_theta , a= batch_gradient_descent(X, y, theta, epoch=epochs, lr = 0.001)
y_train_predict = np.array((X_train @ final_theta))
y_train_predict[np.where((y_train_predict) > 0, 1, 0)]
y_val_predict = np.array((X_val @ final_theta))
y_val_predict[np.where((y_val_predict) > 0, 1, 0)]
train_errors.append(mean_squared_error(y_train_predict, y_train))
val_errors.append(mean_squared_error(y_val_predict, y_val))
plt.plot(np.arange(2, 100000, 500),np.sqrt(train_errors), "r", linewidth = 1.5, label = "training set")
plt.plot(np.arange(2, 100000, 500),np.sqrt(val_errors), "b", linewidth = 1.5, label = "validation set")
plt.legend()
plt.xlabel('epoches')
plt.ylabel('RMSE')
plt.grid()
plt.savefig('learning curve of epoches(scale', dpi = 300)
plot_learning_curves_epoches(X_scale, y)
逻辑回归正则化手写python代码
最新推荐文章于 2022-05-06 18:46:06 发布