import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
data_path = './Bike-Sharing-Dataset/hour.csv'
rides = pd.read_csv(data_path)
def f1():
print(rides.head())
print(rides.describe())
rides.info()
def f2(rides):
"""
# 一、季节、天气(分类变量)、月份、小时、星期几
都是分类变量,需要调整为哑变量。
"""
dummy_fields = ['season', 'weathersit', 'mnth', 'hr', 'weekday']
for each in dummy_fields:
dummies = pd.get_dummies(rides[each], prefix=each, drop_first=False)
rides = pd.concat([rides, dummies], axis=1)
"""
二、除了将 上面的原变量,还有以下变量需要删除,思考下why?
1、instant 记录索引号;
2、dteday 具体某天的日期号;
3、atemp 体感温度,和temp重复,故删除;
4、workingday 是否工作日,和weekday重复了,故删除;
"""
fields_to_drop = ['instant', 'dteday', 'season', 'weathersit',
'weekday', 'atemp', 'mnth', 'workingday', 'hr']
data = rides.drop(fields_to_drop, axis=1)
print(data.head())
rides[:24 * 10].plot(x='dteday', y='cnt')
return data
def f3(data):
"""
连续变量的数据标准化
注意:cnt 就是target
"""
quant_features = ['casual', 'registered', 'cnt', 'temp', 'hum', 'windspeed']
scaled_features = {}
for each in quant_features:
mean, std = data[each].mean(), data[each].std()
scaled_features[each] = [mean, std]
data.loc[:, each] = (data[each] - mean) / std
return data, scaled_features
def f4(data):
"""
拆分数据集,拆分 特征 和 target
"""
test_data = data[-21 * 24:]
data = data[:-21 * 24]
target_fields = ['cnt', 'casual', 'registered']
features, targets = data.drop(target_fields, axis=1), data[target_fields]
test_features, test_targets = test_data.drop(target_fields, axis=1), test_data[target_fields]
return features, targets, test_features, test_targets
def f5(features , targets):
"""
使用训练数据集的后60天数据,作为验证数据集;在模型训练过程中进行验证模型的效果。
"""
train_features, train_targets = features[:-60 * 24], targets[:-60 * 24]
val_features, val_targets = features[-60 * 24:], targets[-60 * 24:]
return train_features, train_targets, val_features, val_targets
class NeuralNetwork(object):
def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
"""
:param input_nodes: 输入的节点数量 (特征数量)
:param hidden_nodes: 隐藏层节点数量
:param output_nodes: 输出层节点数量
:param learning_rate:
"""
self.input_nodes = input_nodes
self.hidden_nodes = hidden_nodes
self.output_nodes = output_nodes
self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes ** -0.5,
size=(self.input_nodes, self.hidden_nodes))
self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes ** -0.5,
size=(self.hidden_nodes, self.output_nodes))
self.lr = learning_rate
self.activation_function = lambda x:1/(1+np.exp(-x))
def train(self, features, targets):
''' 使用 batch==1 的features and targets训练网络
Arguments
---------
features: 2D array, each row is one data record, each column is a feature
targets: 1D array of target values
'''
n_records = features.shape[0]
delta_weights_i_h = np.zeros(self.weights_input_to_hidden.shape)
delta_weights_h_o = np.zeros(self.weights_hidden_to_output.shape)
for X, y in zip(features, targets):
hidden_inputs = np.dot(X, self.weights_input_to_hidden)
hidden_outputs = self.activation_function(hidden_inputs)
final_inputs = np.dot(hidden_outputs, self.weights_hidden_to_output)
final_outputs = final_inputs
error = y-final_outputs
output_error_term = error
hidden_error = output_error_term * hidden_outputs * (1-hidden_outputs)
hidden_error_term = hidden_error * self.weights_hidden_to_output.T
delta_weights_i_h += np.dot(X[:,None],hidden_error_term)
delta_weights_h_o += output_error_term * hidden_outputs[:, None]
self.weights_hidden_to_output += delta_weights_h_o * self.lr /n_records
self.weights_input_to_hidden += delta_weights_i_h * self.lr /n_records
def run(self, features):
'''
预测函数。
使用输入特征,执行1次正向传播,得到预测值
features: 1D array of feature values
'''
hidden_inputs = np.dot(features, self.weights_input_to_hidden)
hidden_outputs = self.activation_function(hidden_inputs)
final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output)
final_outputs = final_inputs
return final_outputs
def MSE(y, Y):
return np.mean((y-Y)**2)
def show(losses):
plt.plot(losses['train'], label='Training loss')
plt.plot(losses['validation'], label='Validation loss')
plt.legend()
_ = plt.ylim()
plt.show()
def test(network,scaled_features, test_features, test_targets, rides):
fig, ax = plt.subplots(figsize=(8, 4))
mean, std = scaled_features['cnt']
predictions = network.run(test_features).T * std + mean
ax.plot(predictions[0], label='Prediction',c='b')
ax.plot((test_targets['cnt'] * std + mean).values, label='Data',c='r')
ax.set_xlim(right=len(predictions))
ax.legend()
dates = pd.to_datetime(rides.ix[test_features.index]['dteday'])
dates = dates.apply(lambda d: d.strftime('%b %d'))
ax.set_xticks(np.arange(len(dates))[12::24])
_ = ax.set_xticklabels(dates[12::24], rotation=45)
if __name__ == '__main__':
data = f2(rides)
data, scaled_features = f3(data)
features, targets, test_features, test_targets = f4(data)
train_features, train_targets, val_features, val_targets = f5(features, targets)
epochs = 2000
learning_rate = 1
hidden_nodes = 10
output_nodes = 1
n_features = train_features.shape[1]
network = NeuralNetwork(n_features, hidden_nodes, output_nodes, learning_rate)
losses = {'train': [], 'validation': []}
for epoch in range(epochs):
batch = np.random.choice(train_features.index, size=128)
X, y = train_features.ix[batch].values, train_targets.ix[batch]['cnt']
network.train(X, y)
train_loss = MSE(network.run(train_features).T, train_targets['cnt'].values)
val_loss = MSE(network.run(val_features).T, val_targets['cnt'].values)
if epoch % 80 == 0:
print('训练迭代次数:{},训练损失:{} ,验证损失:'
'{}'.format(epoch, train_loss, val_loss))
losses['train'].append(train_loss)
losses['validation'].append(val_loss)
show(losses)
test(network, scaled_features, test_features, test_targets, rides)
D:\Anaconda\python.exe D:/AI20/HJZ/04-深度学习/1-深度学习入门/深度学习项目/NN_Bike_Project_Work.py
yr holiday temp hum windspeed casual registered cnt season_1 season_2 season_3 season_4 weathersit_1 weathersit_2 weathersit_3 weathersit_4 mnth_1 mnth_2 mnth_3 mnth_4 mnth_5 mnth_6 mnth_7 mnth_8 mnth_9 mnth_10 mnth_11 mnth_12 hr_0 hr_1 hr_2 hr_3 hr_4 hr_5 hr_6 hr_7 hr_8 hr_9 hr_10 hr_11 hr_12 hr_13 hr_14 hr_15 hr_16 hr_17 hr_18 hr_19 hr_20 hr_21 hr_22 hr_23 weekday_0 weekday_1 weekday_2 weekday_3 weekday_4 weekday_5 weekday_6
0 0 0 0.24 0.81 0.0 3 13 16 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
1 0 0 0.22 0.80 0.0 8 32 40 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
2 0 0 0.22 0.80 0.0 5 27 32 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
3 0 0 0.24 0.75 0.0 3 10 13 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
4 0 0 0.24 0.75 0.0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
训练迭代次数:0,训练损失:1.247132667361339 ,验证损失:2.106105214267062
训练迭代次数:80,训练损失:0.3590529070191875 ,验证损失:0.6150146083572496
训练迭代次数:160,训练损失:0.2857302047498742 ,验证损失:0.4975860738814868
训练迭代次数:240,训练损失:0.2649684710549407 ,验证损失:0.4526445997926321
训练迭代次数:320,训练损失:0.30812790527486 ,验证损失:0.4749887593422388
训练迭代次数:400,训练损失:0.2559837414575372 ,验证损失:0.4302018728932083
训练迭代次数:480,训练损失:0.25141328327203805 ,验证损失:0.40774393223028427
训练迭代次数:560,训练损失:0.24133015747920844 ,验证损失:0.41008070266795693
训练迭代次数:640,训练损失:0.21194590681123657 ,验证损失:0.3772503429560536
训练迭代次数:720,训练损失:0.19508857705601537 ,验证损失:0.381115851797845
训练迭代次数:800,训练损失:0.17771869910895188 ,验证损失:0.32740012577727706
训练迭代次数:880,训练损失:0.1606363330889585 ,验证损失:0.3095860111731942
训练迭代次数:960,训练损失:0.1538612971753264 ,验证损失:0.2729886370271457
训练迭代次数:1040,训练损失:0.1300558959381096 ,验证损失:0.2541307408769083
训练迭代次数:1120,训练损失:0.11874624780709074 ,验证损失:0.23512699322056257
训练迭代次数:1200,训练损失:0.11197449087357411 ,验证损失:0.23873009750987792
训练迭代次数:1280,训练损失:0.10032673189298635 ,验证损失:0.2016466450900573
训练迭代次数:1360,训练损失:0.09108141005437853 ,验证损失:0.1980659294955915
训练迭代次数:1440,训练损失:0.08664068247176354 ,验证损失:0.1958191170556848
训练迭代次数:1520,训练损失:0.08203222095091546 ,验证损失:0.18698433188668584
训练迭代次数:1600,训练损失:0.07840329222472668 ,验证损失:0.16822290844762527
训练迭代次数:1680,训练损失:0.07302075879455985 ,验证损失:0.15383466115381436
训练迭代次数:1760,训练损失:0.07040852265447362 ,验证损失:0.16861114088882537
训练迭代次数:1840,训练损失:0.07538667038437902 ,验证损失:0.1809508112743515
训练迭代次数:1920,训练损失:0.06565033969611891 ,验证损失:0.15227779300291996
Process finished with exit code 0