比赛题目
代码
import pandas as pd
train_df = pd.read_csv("train.csv", header=0)
test_df = pd.read_csv("test.csv", header=0)
selected_features = ['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed']
X_train_df = train_df[selected_features]
y_train_df = train_df['count']
X_test_df = test_df[selected_features]
X_train_df['month'] = pd.DatetimeIndex(X_train_df.datetime).month
X_train_df['day'] = pd.DatetimeIndex(X_train_df.datetime).dayofweek
X_train_df['hour'] = pd.DatetimeIndex(X_train_df.datetime).hour
X_train_df = X_train_df.drop(['datetime'], axis=1)
X_test_df['month'] = pd.DatetimeIndex(X_test_df.datetime).month
X_test_df['day'] = pd.DatetimeIndex(X_test_df.datetime).dayofweek
X_test_df['hour'] = pd.DatetimeIndex(X_test_df.datetime).hour
X_test_df = X_test_df.drop(['datetime'], axis=1)
from sklearn.feature_extraction import DictVectorizer
dict_vec = DictVectorizer(sparse=False)
X_train_df = dict_vec.fit_transform(X_train_df.to_dict(orient='record'))
X_test_df = dict_vec.transform(X_test_df.to_dict(orient='record'))
from sklearn.ensemble import RandomForestRegressor
gbr = RandomForestRegressor()
gbr.fit(X_train_df, y_train_df)
gbr_y_predict = gbr.predict(X_test_df)
gbr_submission = pd.DataFrame({'datetime': test_df['datetime'], 'count': gbr_y_predict})
gbr_submission.to_csv('gbr_submission.csv', index=False)