test

最新推荐文章于 2022-10-24 10:45:11 发布

这条gei最靓的崽

最新推荐文章于 2022-10-24 10:45:11 发布

阅读量189

点赞数

CC 4.0 BY-SA版权

分类专栏： Python 文章标签：深度学习

本文链接：https://blog.youkuaiyun.com/qq_36344619/article/details/112593003

Python 专栏收录该内容

2 篇文章

订阅专栏

博客展示了使用Python进行PM2.5预测的代码。导入相关库后，定义绘图和获取评估指标的函数。读取数据并进行预处理，包括分组、添加季节列、数据归一化等。使用线性回归、随机森林回归和LSTM模型进行预测，并对结果进行评估。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

def draw(path, y_true, y_predict):
y_predict = pd.Series(y_predict)
y_predict.index = y_test.index
fig = plt.figure(figsize=(10, 6))
y_true.sort_index(inplace=True)
y_predict.sort_index(inplace=True)
plt.plot(y_true.index, y_true, marker=‘o’, markersize=1)
plt.plot(y_true.index, y_predict, marker=‘x’, markersize=1)
plt.savefig(path)

def getMetrics(y_true, y_predict):
Result = {}
Result[‘MAE’] = metrics.mean_absolute_error(y_test, y_predict)
Result[‘RMSE’] = metrics.mean_squared_error(y_test, y_predict, squared=False)
Result[‘MAPE’] = metrics.mean_absolute_percentage_error(y_test, y_predict)
return Result

if name == ‘main’:
FileList = [‘data/PRSA_Data_Aotizhongxin_20130301-20170228.csv’, ‘data/PRSA_Data_Changping_20130301-20170228.csv’]
rawData = pd.read_csv(FileList[0])
rawData[‘season’] = (rawData[‘month’] -1) // 3 + 1
rawData = rawData.groupby([‘year’,‘season’])
resultSet = {}
resultSet[“LinearRegression”] = {}
resultSet[“RandomForestRegressor”] = {}
resultSet[“LSTM”] = {}
for item in rawData:
print(’’ * 20, item[0], '’ * 20)
path = str(item[0][0]) + ‘-sea’ + str(item[0][1])
data = pd.DataFrame(item[1][[“year”,“month”,“day”,“hour”,“PM2.5”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“wd”,“WSPM”]])
data[“Date”] = pd.to_datetime(data[[“year”,“month”,“day”,“hour”]], format=’%Y%m%d’)
dataset = data[[“PM2.5”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“WSPM”]]
dataset.index = data[“Date”]
dataset[“PM2.5(t-1)”] = data[“PM2.5”].shift(1).values
# 数据归一化处理
encoder = LabelEncoder()
dataset[“WD”] = encoder.fit_transform(data[“wd”])
dataset.dropna(axis=0, how=‘any’, inplace=True)
dataset = dataset.apply(lambda x: (x - np.min(x)+1) / (np.max(x) - np.min(x)+1))
X = dataset[[“PM2.5(t-1)”,“TEMP”,“PRES”,“DEWP”,“RAIN”,“WD”,“WSPM”]]
y = dataset[“PM2.5”]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    regressor = LinearRegression().fit(X_train, y_train)
    y_predict = regressor.predict(X_test)
    draw('result/LinearRegression-' + path, y_test, y_predict)
    resultSet["LinearRegression"][path] = getMetrics(y_test, y_predict)

    regressor = RandomForestRegressor().fit(X, y)
    y_predict = regressor.predict(X_test)
    draw('result/RandomForestRegressor-' + path, y_test, y_predict)
    resultSet["RandomForestRegressor"][path] = getMetrics(y_test, y_predict)

    train_X = X_train.values
    test_X = X_test.values
    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_X = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
    test_X = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    history = model.fit(train_X, y_train, epochs=50, batch_size=128, verbose=1)
    y_predict = model.predict(test_X)
    y_predict = y_predict.reshape((y_test.shape[0]))
    draw('result/LSTM-' + path, y_test, y_predict)
    resultSet["LSTM"][path] = getMetrics(y_test, y_predict)
result = pd.DataFrame(resultSet["LinearRegression"]).T
result.to_csv("LinearRegression.csv")
print(result)

result = pd.DataFrame(resultSet["RandomForestRegressor"]).T
result.to_csv("RandomForestRegressor.csv")
print(result)

result = pd.DataFrame(resultSet["LSTM"]).T
result.to_csv("LSTM.csv")
print(result)