Kaggle : https://www.kaggle.com/c/digit-recognizer/
准确率:0.966
分类器:随机森林
代码详解:
import pandas as pd
# Random Forest Classifier
def random_forest_classifier(train_x, train_y):
from sklearn.ensemble import RandomForestClassifier
#使用RF分类器模型,定义三个参数值n_estimators=400, n_jobs=4, verbose=1
model = RandomForestClassifier(n_estimators=400, n_jobs=4, verbose=1)
model.fit(train_x, train_y)
return model
#数据读取
def read_data(data_file):
data = pd.read_csv(data_file)
train = data[:int(len(data) * 1)]
test = pd.read_csv("/Users/Cheney/Downloads/kaggle(方老师)/test.csv")
#将标签和features分开
train_y = train.label
train_x = train.drop('label', axis=1)
return train_x, train_y, test
if __name__ == '__main__':
data_file = "/Users/Cheney/Downloads/kaggle(方老师)/train.csv"
#用模型进行训练
train_x, train_y, test = read_data(data_file)
model = random_forest_classifier(train_x, train_y)
#预测数据
predict = model.predict(test)
#数据写出
dataframe = pd.DataFrame(predict)
dataframe.to_csv('/Users/Cheney/Downloads/kaggle(方老师)/res.csv')