思路个前面的两篇一致(xg,lgbm),只是换了一个数据集,感兴趣的小伙伴可以将这个代码换成之前的数据集,这里我是预测为了7天的数据,利用一年的数据。另外采用的调参方式也进行了改变,这次不是hyperopt,也是gridsearch
import pandas as pd
import gc
from numpy import nan
from numpy import isnan
from pandas import read_csv
from pandas import to_numeric
from sklearn.metrics import r2_score
import lightgbm as lgb
# multivariate multi-step encoder-decoder lstm
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from numpy.random import seed
import numpy as np
import xgboost as xgb
import pandas as pd
#from sklearn.metrics import roc_auc_score
from sklearn.metrics import explained_variance_score
import matplotlib.pyplot as plt
from hyperopt import STATUS_OK,STATUS_RUNNING, fmin, hp, tpe,space_eval
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
SEED = 314159265
VALID_SIZE = 0.25
sheet = pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],header=0, index_col=0)
#dataset=sheet[0].astype('float64')
#values=dataset.values
print(sheet[0])
data=sheet[0]
ans=sheet[0].copy()
print(data.head())
# convert timeserise to supervised-learn
def to_supervised(data):
x = data.iloc[0:360,:].values
y = data.iloc[7:367,3].values
return x, y
data_x,data_y=to_supervised(sheet[0])
print(data_x.shape)
print(data_y.shape)
train_x,test_x=data_x[0:300],data_x[300:367]
train_y,test_y=data_y[0:300],data_y[300:367]
print(test_x.shape)
print(train_x.shape)
print(test_y.shape)
print(train_y.shape)
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
rfgs_parameters = {
'n_estimators': [n for n in range(30, 50)],
'max_depth' : [n for n in range(2, 6)],
'max_features': [n for n in range(2, 6)],
"min_samples_split": [n for n in range(2, 4)],
"min_samples_leaf": [n for n in range(2, 4)],
"bootstrap": [True,False]
}
rfr_cv = GridSearchCV(RandomForestRegressor(), rfgs_parameters, cv=8, scoring= 'neg_mean_squared_log_error')
rfr_cv.fit(train_x, train_y)
print("RFR GridSearch score: "+str(rfr_cv.best_score_))
print("RFR GridSearch params: ")
print(rfr_cv.best_params_)
prediction1 = rfr_cv.best_estimator_.predict(test_x)
print(prediction1)
print(r2_score(test_y,prediction1))
prediction2 = rfr_cv.best_estimator_.predict(test_x[364:368,:])
print(prediction2)
"""
print("---------优化完成----------")
##训练模型
print(best_params)
print("---------正式训练模型----------")
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model_gbm = xgb.train(best_params, dtrain, 1000, evals=watchlist,early_stopping_rounds=1000,verbose_eval=True)
print("---------正式预测模型----------")
print("Predict test set...")
test_prediction = model_gbm.predict(xgb.DMatrix(test_x), ntree_limit=model_gbm.best_iteration+1)
print("---------预测完成----------")
print(test_prediction)
test_prediction = model_gbm.predict(xgb.DMatrix(test_x[364:368,:]), ntree_limit=model_gbm.best_iteration+1)
print("---------预测完成----------")
print(test_prediction)
"""