2-神经网络起源-demo3-单车预测NN_Bike_Project

本文介绍了一个深度学习项目，使用神经网络预测自行车共享系统的使用情况。通过对历史数据进行预处理，包括特征工程和数据标准化，然后构建并训练一个神经网络模型，以预测特定条件下自行车的需求量。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 解决pd中print中间省略的问题
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

# 读取数据
data_path = './Bike-Sharing-Dataset/hour.csv'
rides = pd.read_csv(data_path)


def f1():
    print(rides.head())
    print(rides.describe())
    rides.info()


def f2(rides):
    """
    # 一、季节、天气（分类变量）、月份、小时、星期几
    都是分类变量，需要调整为哑变量。
    """
    dummy_fields = ['season', 'weathersit', 'mnth', 'hr', 'weekday']
    for each in dummy_fields:
        dummies = pd.get_dummies(rides[each], prefix=each, drop_first=False)
        rides = pd.concat([rides, dummies], axis=1)
    """
    二、除了将 上面的原变量，还有以下变量需要删除，思考下why?
    1、instant 记录索引号；
    2、dteday  具体某天的日期号；
    3、atemp  体感温度，和temp重复，故删除；
    4、workingday 是否工作日，和weekday重复了，故删除；
    """
    fields_to_drop = ['instant', 'dteday', 'season', 'weathersit',
                      'weekday', 'atemp', 'mnth', 'workingday', 'hr']
    data = rides.drop(fields_to_drop, axis=1)
    print(data.head())

    rides[:24 * 10].plot(x='dteday', y='cnt')
    return data


def f3(data):
    """
    连续变量的数据标准化
    注意:cnt 就是target
    """
    quant_features = ['casual', 'registered', 'cnt', 'temp', 'hum', 'windspeed']
    # 将换算因子进行保存，以便在预测的时候还原数据。
    scaled_features = {}
    for each in quant_features:
        mean, std = data[each].mean(), data[each].std()
        scaled_features[each] = [mean, std]
        data.loc[:, each] = (data[each] - mean) / std
    return data, scaled_features


def f4(data):
    """
    拆分数据集，拆分 特征 和 target
    """
    # 保存最后21天 作为测试数据集
    test_data = data[-21 * 24:]

    # 移除最后21天数据，作为训练数据集
    data = data[:-21 * 24]

    # 将特征值 和 target进行拆分
    target_fields = ['cnt', 'casual', 'registered']
    features, targets = data.drop(target_fields, axis=1), data[target_fields]
    test_features, test_targets = test_data.drop(target_fields, axis=1), test_data[target_fields]
    return features, targets, test_features, test_targets


def f5(features , targets):
    """
    使用训练数据集的后60天数据，作为验证数据集；在模型训练过程中进行验证模型的效果。
    """
    train_features, train_targets = features[:-60 * 24], targets[:-60 * 24]
    val_features, val_targets = features[-60 * 24:], targets[-60 * 24:]
    return train_features, train_targets, val_features, val_targets


class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        """
        :param input_nodes:   输入的节点数量 （特征数量）
        :param hidden_nodes:  隐藏层节点数量
        :param output_nodes:  输出层节点数量
        :param learning_rate:
        """
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # 初始化权重
        self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes ** -0.5,
                                                        size=(self.input_nodes, self.hidden_nodes))

        self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes ** -0.5,
                                                         size=(self.hidden_nodes, self.output_nodes))
        self.lr = learning_rate

        # TODO: 设置 self.activation_function 来部署 sigmoid 函数
        self.activation_function = lambda x:1/(1+np.exp(-x))

        # 如果你对 lambda表达式不熟悉，也可以用下面方式部署：
        #def sigmoid(x):
         #  return 1/(1+np.exp(-x))
        #self.activation_function = sigmoid

    def train(self, features, targets):
        ''' 使用 batch==1 的features and targets训练网络
            Arguments
            ---------
            features: 2D array, each row is one data record, each column is a feature
            targets: 1D array of target values
        '''
        n_records = features.shape[0]
        delta_weights_i_h = np.zeros(self.weights_input_to_hidden.shape)
        delta_weights_h_o = np.zeros(self.weights_hidden_to_output.shape)
        for X, y in zip(features, targets):
            # 1、正向传播，None的地方都需要你编程

            # TODO: 隐藏层
            hidden_inputs = np.dot(X, self.weights_input_to_hidden)
            hidden_outputs = self.activation_function(hidden_inputs)

            # TODO: 输出层
            final_inputs = np.dot(hidden_outputs, self.weights_hidden_to_output)
            final_outputs = final_inputs

            # 2、反向传播

            # TODO: Output error
            error = y-final_outputs  # 为预测值和真实值的差异

            # TODO: 计算隐藏层对误差error的贡献
            output_error_term = error #* final_outputs * (1-final_outputs)  # 标量

            # TODO: 反向传播 error terms
            hidden_error = output_error_term * hidden_outputs * (1-hidden_outputs)  # 向量  (hidden_nodes, )

            hidden_error_term = hidden_error * self.weights_hidden_to_output.T

            #hidden_error = output_error_term * self.weights_hidden_to_output
            #hidden_error_term = hidden_error * hidden_outputs * (1 - hidden_outputs)#GZB

            # 输入to隐藏层 权重梯度累加步骤 Weight step (input to hidden)
            delta_weights_i_h += np.dot(X[:,None],hidden_error_term)
            # 隐藏层to输出层 权重梯度累加步骤Weight step (hidden to output)
            delta_weights_h_o += output_error_term * hidden_outputs[:, None]#hidden_outputs

        # TODO: 更新权重Update the weights
        self.weights_hidden_to_output += delta_weights_h_o * self.lr /n_records  # update hidden-to-output weights with gradient descent step
        self.weights_input_to_hidden += delta_weights_i_h * self.lr /n_records # update input-to-hidden weights with gradient descent step

    def run(self, features):
        '''
        预测函数。
        使用输入特征，执行1次正向传播，得到预测值
        features: 1D array of feature values
        '''
        # 部署正向传播
        # TODO: 隐藏层
        hidden_inputs = np.dot(features, self.weights_input_to_hidden)
        hidden_outputs = self.activation_function(hidden_inputs)

        # TODO: 输出层
        final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output)
        final_outputs = final_inputs

        return final_outputs


def MSE(y, Y):
    return np.mean((y-Y)**2)

# 显示训练过程中的训练 和 验证损失
def show(losses):
    plt.plot(losses['train'], label='Training loss')
    plt.plot(losses['validation'], label='Validation loss')
    plt.legend()
    _ = plt.ylim()
    plt.show()


def test(network,scaled_features, test_features, test_targets, rides):#dates[12::24],:-21 * 24
    fig, ax = plt.subplots(figsize=(8, 4))
    mean, std = scaled_features['cnt']
    predictions = network.run(test_features).T * std + mean
    ax.plot(predictions[0], label='Prediction',c='b')
    ax.plot((test_targets['cnt'] * std + mean).values, label='Data',c='r')
    ax.set_xlim(right=len(predictions))
    ax.legend()

    dates = pd.to_datetime(rides.ix[test_features.index]['dteday'])
    dates = dates.apply(lambda d: d.strftime('%b %d'))
    ax.set_xticks(np.arange(len(dates))[12::24])
    _ = ax.set_xticklabels(dates[12::24], rotation=45)


if __name__ == '__main__':
    # f1()
    data = f2(rides)
    data, scaled_features = f3(data)
    features, targets, test_features, test_targets = f4(data)
    train_features, train_targets, val_features, val_targets = f5(features, targets)
    # todo 设置超参数 ###
    epochs = 2000        # 迭代次数
    learning_rate = 1  # 学习率
    hidden_nodes = 10  # 隐藏层节点数量，决定你模型的复杂度。
    output_nodes = 1  # 输出层的节点数量。

    n_features = train_features.shape[1]

    network = NeuralNetwork(n_features, hidden_nodes, output_nodes, learning_rate)
    #network = NeuralNetwork()
    losses = {'train': [], 'validation': []}
    for epoch in range(epochs):
        # 每次随机从训练数据集中抽取128条记录作为训练
        batch = np.random.choice(train_features.index, size=128)
        X, y = train_features.ix[batch].values, train_targets.ix[batch]['cnt']

        network.train(X, y)

        # 打印出训练过程
        train_loss = MSE(network.run(train_features).T, train_targets['cnt'].values)
        val_loss = MSE(network.run(val_features).T, val_targets['cnt'].values)
        if epoch % 80 == 0:
            print('训练迭代次数：{},训练损失:{} ,验证损失:'
                  '{}'.format(epoch, train_loss, val_loss))

        losses['train'].append(train_loss)
        losses['validation'].append(val_loss)

    show(losses)
    test(network, scaled_features, test_features, test_targets, rides)

D:\Anaconda\python.exe D:/AI20/HJZ/04-深度学习/1-深度学习入门/深度学习项目/NN_Bike_Project_Work.py
   yr  holiday  temp   hum  windspeed  casual  registered  cnt  season_1  season_2  season_3  season_4  weathersit_1  weathersit_2  weathersit_3  weathersit_4  mnth_1  mnth_2  mnth_3  mnth_4  mnth_5  mnth_6  mnth_7  mnth_8  mnth_9  mnth_10  mnth_11  mnth_12  hr_0  hr_1  hr_2  hr_3  hr_4  hr_5  hr_6  hr_7  hr_8  hr_9  hr_10  hr_11  hr_12  hr_13  hr_14  hr_15  hr_16  hr_17  hr_18  hr_19  hr_20  hr_21  hr_22  hr_23  weekday_0  weekday_1  weekday_2  weekday_3  weekday_4  weekday_5  weekday_6
0   0        0  0.24  0.81        0.0       3          13   16         1         0         0         0             1             0             0             0       1       0       0       0       0       0       0       0       0        0        0        0     1     0     0     0     0     0     0     0     0     0      0      0      0      0      0      0      0      0      0      0      0      0      0      0          0          0          0          0          0          0          1
1   0        0  0.22  0.80        0.0       8          32   40         1         0         0         0             1             0             0             0       1       0       0       0       0       0       0       0       0        0        0        0     0     1     0     0     0     0     0     0     0     0      0      0      0      0      0      0      0      0      0      0      0      0      0      0          0          0          0          0          0          0          1
2   0        0  0.22  0.80        0.0       5          27   32         1         0         0         0             1             0             0             0       1       0       0       0       0       0       0       0       0        0        0        0     0     0     1     0     0     0     0     0     0     0      0      0      0      0      0      0      0      0      0      0      0      0      0      0          0          0          0          0          0          0          1
3   0        0  0.24  0.75        0.0       3          10   13         1         0         0         0             1             0             0             0       1       0       0       0       0       0       0       0       0        0        0        0     0     0     0     1     0     0     0     0     0     0      0      0      0      0      0      0      0      0      0      0      0      0      0      0          0          0          0          0          0          0          1
4   0        0  0.24  0.75        0.0       0           1    1         1         0         0         0             1             0             0             0       1       0       0       0       0       0       0       0       0        0        0        0     0     0     0     0     1     0     0     0     0     0      0      0      0      0      0      0      0      0      0      0      0      0      0      0          0          0          0          0          0          0          1
训练迭代次数：0,训练损失:1.247132667361339 ,验证损失:2.106105214267062
训练迭代次数：80,训练损失:0.3590529070191875 ,验证损失:0.6150146083572496
训练迭代次数：160,训练损失:0.2857302047498742 ,验证损失:0.4975860738814868
训练迭代次数：240,训练损失:0.2649684710549407 ,验证损失:0.4526445997926321
训练迭代次数：320,训练损失:0.30812790527486 ,验证损失:0.4749887593422388
训练迭代次数：400,训练损失:0.2559837414575372 ,验证损失:0.4302018728932083
训练迭代次数：480,训练损失:0.25141328327203805 ,验证损失:0.40774393223028427
训练迭代次数：560,训练损失:0.24133015747920844 ,验证损失:0.41008070266795693
训练迭代次数：640,训练损失:0.21194590681123657 ,验证损失:0.3772503429560536
训练迭代次数：720,训练损失:0.19508857705601537 ,验证损失:0.381115851797845
训练迭代次数：800,训练损失:0.17771869910895188 ,验证损失:0.32740012577727706
训练迭代次数：880,训练损失:0.1606363330889585 ,验证损失:0.3095860111731942
训练迭代次数：960,训练损失:0.1538612971753264 ,验证损失:0.2729886370271457
训练迭代次数：1040,训练损失:0.1300558959381096 ,验证损失:0.2541307408769083
训练迭代次数：1120,训练损失:0.11874624780709074 ,验证损失:0.23512699322056257
训练迭代次数：1200,训练损失:0.11197449087357411 ,验证损失:0.23873009750987792
训练迭代次数：1280,训练损失:0.10032673189298635 ,验证损失:0.2016466450900573
训练迭代次数：1360,训练损失:0.09108141005437853 ,验证损失:0.1980659294955915
训练迭代次数：1440,训练损失:0.08664068247176354 ,验证损失:0.1958191170556848
训练迭代次数：1520,训练损失:0.08203222095091546 ,验证损失:0.18698433188668584
训练迭代次数：1600,训练损失:0.07840329222472668 ,验证损失:0.16822290844762527
训练迭代次数：1680,训练损失:0.07302075879455985 ,验证损失:0.15383466115381436
训练迭代次数：1760,训练损失:0.07040852265447362 ,验证损失:0.16861114088882537
训练迭代次数：1840,训练损失:0.07538667038437902 ,验证损失:0.1809508112743515
训练迭代次数：1920,训练损失:0.06565033969611891 ,验证损失:0.15227779300291996

Process finished with exit code 0