import numpy as np
import pandas as pd
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
def load_dataset(feature_path, label_path):
feature = np.ndarray(shape=(0,41))
label = np.ndarray(shape=(0,1))
for file in feature_path:
df = pd.read_table(file, delimiter=',', na_values='?', header = None)
imp = Imputer(missing_values='Nan', strategy='mean', axis=0)
imp.fit(df)
df = imp.transform(df)
feature = np.concatenate((feature, df))
for file in label_path:
df = pd.read_table(file, header=None)
label = np.concatenate(label, df)
label = np.ravel(label)
return feature, label
if __name__ == '__main__':
feature_paths = ['A.feature', 'B.feature', 'C.feature', 'D.feature', 'E.feature']
label_paths = ['A.label', 'B.label', 'C.label', 'D.label', 'E.label']
x_train, y_train = load_dataset(feature_paths[:4], label_paths[:4])
x_test, y_test = load_dataset(feature_paths[4:], label_paths[4:])
x_train, x_, y_train, y_ = train_test_split(x_train,y_train, test_size=0.0)
print('Start training knn')
knn = KNeighborsClassifier().fit(x_train, y_train)
print('Training done')
answer_knn = knn.predict(x_test)
print('Prediction done')
print('Start training DT')
dt = DecisionTreeClassifier().fit(x_train, y_train)
print('Training done')
answer_dt = dt.predict(x_test)
print('Prediction done')
print('Start training Bayes')
gnb = GaussianNB().fit(x_train, y_train)
print('Training done')
answer_gnb = gnb.predict(x_test)
print('Prediction done')
print('\n\nThe classification report for knn:')
print(classification_report(y_test, answer_knn))
print('\n\nThe classification report for DT:')
print(classification_report(y_test, answer_dt))
print('\n\nThe classification report for Bayes:')
print(classification_report(y_test, answer_gnb))