场景描述:基于物联网检测设备,大量采集流量数据和液位高度数据存储在时序数据库(influxdb)中。并采用下面代码对数据进行预测
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Author:李智敏
# Wechat:anark919
# Date:2024-01-05 10:17
# Title:
import os.path
import uuid
import pandas as pd
class Neural_networks():
def __init__(self,para_fields:list,outcome_fields:list,model_dir:str,model_name:str=uuid.uuid4().hex):
'''
:param para_fields: 自变量
:param outcome_fields: 因变量
:param model_dir: 模型存储地址
:param model_name: 模型名称,不传,以uuid生成唯一识别名称
'''
self.para_fields=para_fields
self.outcome_fields=outcome_fields
self.model_name = model_name
self.model_dir = os.path.join(model_dir,model_name)
if not os.path.exists(self.model_dir ):
os.makedirs(self.model_dir )
def training(self,data:pd.DataFrame,epochs=50000):
'''模型训练'''
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import regularizers
# 加载数据
df = data
rain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")
df.to_csv(rain_dir, index=False)
x = df[self.para_fields].values
y = df[self.outcome_fields].values
# 数据归一化
x_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler = MinMaxScaler(feature_range=(-1, 1))
x = x_scaler.fit_transform(x)
y = y_scaler.fit_transform(y)
# 定义神经网络模型
model = Sequential()
# model.add(Dense(10, activation='relu', input_shape=(3,), kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(10, activation='relu', input_shape=(len(self.para_fields),), kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, activation='linear'))
# 误差记录
optimizer = Adam(lr=0.0001)
model.compile(optimizer=optimizer, loss='mse')
# 训练模型
history = model.fit(x, y, epochs=epochs, batch_size=67)
# 评估模型
mse = model.evaluate(x, y)
print('Validation MSE:', mse)
# 保存模型的权重和偏差
model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")
model.save(model_dir)
# 误差曲线
# 设置中文显示和解决负号显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.plot(history.history['loss'])
plt.title("模型误差")
plt.ylabel("误差")
plt.xlabel("循环次数")
img_dir = os.path.join(self.model_dir ,f"误差曲线_{self.model_name}.png")
plt.savefig(img_dir)
plt.show()
# 预测值输出
y_pred = model.predict(x)
# 预测值反归一化
y = y_scaler.inverse_transform(y)
y_pred = y_scaler.inverse_transform(y_pred)
print("the prediction is:", y_pred)
# 将预测值存储到Excel表中
df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)
Prediction_dir = os.path.join(self.model_dir ,f"prediction_train_{self.model_name}.xlsx")
df_out.to_excel(Prediction_dir, index=False)
# 实际值与预测值的对比图
# 设置中文显示和解决负号显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.scatter(y, y_pred)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
plt.xlabel("实际值")
plt.ylabel("预测值")
img_dir = os.path.join(self.model_dir ,f"对比图_{self.model_name}.png")
plt.savefig(img_dir)
plt.show()
return True
def forecast(self,data:pd.DataFrame):
'''模型预测'''
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
# 数据预处理(归一化)
rain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")
df = pd.read_csv(rain_dir)
x = df[self.para_fields].values
y = df[self.outcome_fields].values
x_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler = MinMaxScaler(feature_range=(-1, 1))
x = x_scaler.fit_transform(x)
y = y_scaler.fit_transform(y)
# 加载预测数据
df_test = data
x_test = df_test[self.para_fields].values
# 预测数据归一化
x_test = x_scaler.transform(x_test)
# 加载训练好的神经网络模型
# 中文路径会报编码错误
model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")
model = load_model(model_dir)
# 对预测数据进行预测
y_pred = model.predict(x_test)
y_pred = y_scaler.inverse_transform(y_pred)
# print(y_pred)
# 将预测值存储到Excel表中
df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)
df_out = pd.merge(df_test,df_out,how='outer',left_index=True,right_index=True)
Prediction_dir = os.path.join(self.model_dir ,f"prediction_{self.model_name}.xlsx")
df_out.to_excel(Prediction_dir, index=False)
return df_out.T.to_dict().values()
if __name__ == '__main__':
# df = pd.read_csv('train.csv')
from 数据库操作 import influxdb_query
data1 = influxdb_query('SELECT para505 FROM "device_YL_315103022220A6D3_1" limit 300')
data = influxdb_query('SELECT para168 FROM "device_LD_716001012220A5E8_1" limit 300')
# print(data[0]['time'])
# print(data1)
df = pd.DataFrame([{'time':k['time'],'para505':k['para505'],'para168':v['para168']} for k,v in zip(data1[:100],data[:100])])
df1 = pd.DataFrame([{'time':k['time'],'para505':k['para505']} for k,v in zip(data1[100:],data[100:])])
# print(df,df1)
# # df1 = pd.read_csv('test.csv')
os.chdir('D://')
# n = Neural_networks(['x1', 'x2', 'x3'],['y'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')
n = Neural_networks(['para505'],['para168'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')
# print(n.training(df,epochs=5000))
print(n.forecast(df1))
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
- 136.
- 137.
- 138.
- 139.
- 140.
- 141.
- 142.
- 143.
- 144.
- 145.
- 146.
- 147.
- 148.
- 149.
- 150.
- 151.
- 152.
- 153.
- 154.
- 155.
- 156.
- 157.
- 158.
- 159.
- 160.
- 161.
- 162.
- 163.
- 164.
- 165.
- 166.
- 167.
- 168.
- 169.
- 170.