场景描述:基于物联网检测设备,大量采集流量数据和液位高度数据存储在时序数据库(influxdb)中。并采用下面代码对数据进行预测

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Author:李智敏
# Wechat:anark919
# Date:2024-01-05 10:17
# Title:
import os.path
import uuid

import pandas as pd


class Neural_networks():
    def __init__(self,para_fields:list,outcome_fields:list,model_dir:str,model_name:str=uuid.uuid4().hex):
        '''

        :param para_fields: 自变量
        :param outcome_fields: 因变量
        :param model_dir: 模型存储地址
        :param model_name: 模型名称,不传,以uuid生成唯一识别名称
        '''
        self.para_fields=para_fields
        self.outcome_fields=outcome_fields
        self.model_name = model_name
        self.model_dir = os.path.join(model_dir,model_name)
        if not os.path.exists(self.model_dir ):
            os.makedirs(self.model_dir )

    def training(self,data:pd.DataFrame,epochs=50000):
        '''模型训练'''
        import pandas as pd
        import matplotlib.pyplot as plt
        from sklearn.preprocessing import MinMaxScaler
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.optimizers import Adam
        from keras import regularizers

        # 加载数据
        df = data
        rain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")
        df.to_csv(rain_dir, index=False)
        x = df[self.para_fields].values
        y = df[self.outcome_fields].values

        # 数据归一化
        x_scaler = MinMaxScaler(feature_range=(-1, 1))
        y_scaler = MinMaxScaler(feature_range=(-1, 1))
        x = x_scaler.fit_transform(x)
        y = y_scaler.fit_transform(y)

        # 定义神经网络模型
        model = Sequential()
        # model.add(Dense(10, activation='relu', input_shape=(3,), kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(10, activation='relu', input_shape=(len(self.para_fields),), kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
        model.add(Dense(1, activation='linear'))

        # 误差记录
        optimizer = Adam(lr=0.0001)
        model.compile(optimizer=optimizer, loss='mse')

        # 训练模型
        history = model.fit(x, y, epochs=epochs, batch_size=67)

        # 评估模型
        mse = model.evaluate(x, y)
        print('Validation MSE:', mse)

        # 保存模型的权重和偏差
        model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")
        model.save(model_dir)

        # 误差曲线
        # 设置中文显示和解决负号显示问题
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.rcParams['axes.unicode_minus'] = False
        plt.plot(history.history['loss'])
        plt.title("模型误差")
        plt.ylabel("误差")
        plt.xlabel("循环次数")
        img_dir = os.path.join(self.model_dir ,f"误差曲线_{self.model_name}.png")
        plt.savefig(img_dir)
        plt.show()

        # 预测值输出
        y_pred = model.predict(x)

        # 预测值反归一化
        y = y_scaler.inverse_transform(y)
        y_pred = y_scaler.inverse_transform(y_pred)
        print("the prediction is:", y_pred)

        # 将预测值存储到Excel表中
        df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)
        Prediction_dir = os.path.join(self.model_dir ,f"prediction_train_{self.model_name}.xlsx")
        df_out.to_excel(Prediction_dir, index=False)

        # 实际值与预测值的对比图
        # 设置中文显示和解决负号显示问题
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.rcParams['axes.unicode_minus'] = False
        plt.scatter(y, y_pred)
        plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
        plt.xlabel("实际值")
        plt.ylabel("预测值")
        img_dir = os.path.join(self.model_dir ,f"对比图_{self.model_name}.png")
        plt.savefig(img_dir)
        plt.show()

        return True


    def forecast(self,data:pd.DataFrame):
        '''模型预测'''
        import pandas as pd
        from sklearn.preprocessing import MinMaxScaler
        from keras.models import load_model

        # 数据预处理(归一化)
        rain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")
        df = pd.read_csv(rain_dir)
        x = df[self.para_fields].values
        y = df[self.outcome_fields].values
        x_scaler = MinMaxScaler(feature_range=(-1, 1))
        y_scaler = MinMaxScaler(feature_range=(-1, 1))
        x = x_scaler.fit_transform(x)
        y = y_scaler.fit_transform(y)

        # 加载预测数据
        df_test = data
        x_test = df_test[self.para_fields].values

        # 预测数据归一化
        x_test = x_scaler.transform(x_test)

        # 加载训练好的神经网络模型
        # 中文路径会报编码错误
        model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")
        model = load_model(model_dir)

        # 对预测数据进行预测
        y_pred = model.predict(x_test)
        y_pred = y_scaler.inverse_transform(y_pred)
        # print(y_pred)
        # 将预测值存储到Excel表中
        df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)
        df_out = pd.merge(df_test,df_out,how='outer',left_index=True,right_index=True)
        Prediction_dir = os.path.join(self.model_dir ,f"prediction_{self.model_name}.xlsx")
        df_out.to_excel(Prediction_dir, index=False)
        return df_out.T.to_dict().values()


if __name__ == '__main__':
    # df = pd.read_csv('train.csv')
    from 数据库操作 import influxdb_query
    data1 = influxdb_query('SELECT para505 FROM "device_YL_315103022220A6D3_1" limit 300')
    data = influxdb_query('SELECT para168 FROM "device_LD_716001012220A5E8_1" limit 300')
    # print(data[0]['time'])
    # print(data1)
    df = pd.DataFrame([{'time':k['time'],'para505':k['para505'],'para168':v['para168']} for k,v in zip(data1[:100],data[:100])])
    df1 = pd.DataFrame([{'time':k['time'],'para505':k['para505']} for k,v in zip(data1[100:],data[100:])])
    # print(df,df1)

    # # df1 = pd.read_csv('test.csv')
    os.chdir('D://')
    # n = Neural_networks(['x1', 'x2', 'x3'],['y'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')
    n = Neural_networks(['para505'],['para168'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')
    # print(n.training(df,epochs=5000))
    print(n.forecast(df1))
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 66.
  • 67.
  • 68.
  • 69.
  • 70.
  • 71.
  • 72.
  • 73.
  • 74.
  • 75.
  • 76.
  • 77.
  • 78.
  • 79.
  • 80.
  • 81.
  • 82.
  • 83.
  • 84.
  • 85.
  • 86.
  • 87.
  • 88.
  • 89.
  • 90.
  • 91.
  • 92.
  • 93.
  • 94.
  • 95.
  • 96.
  • 97.
  • 98.
  • 99.
  • 100.
  • 101.
  • 102.
  • 103.
  • 104.
  • 105.
  • 106.
  • 107.
  • 108.
  • 109.
  • 110.
  • 111.
  • 112.
  • 113.
  • 114.
  • 115.
  • 116.
  • 117.
  • 118.
  • 119.
  • 120.
  • 121.
  • 122.
  • 123.
  • 124.
  • 125.
  • 126.
  • 127.
  • 128.
  • 129.
  • 130.
  • 131.
  • 132.
  • 133.
  • 134.
  • 135.
  • 136.
  • 137.
  • 138.
  • 139.
  • 140.
  • 141.
  • 142.
  • 143.
  • 144.
  • 145.
  • 146.
  • 147.
  • 148.
  • 149.
  • 150.
  • 151.
  • 152.
  • 153.
  • 154.
  • 155.
  • 156.
  • 157.
  • 158.
  • 159.
  • 160.
  • 161.
  • 162.
  • 163.
  • 164.
  • 165.
  • 166.
  • 167.
  • 168.
  • 169.
  • 170.