scikit-learn中的KNN
from sklearn.neighbors import KNeighborsClassifier
kNN_classifier=KNeighborsClassifier(n_neighbors=k)
kNN_classifier.fit(x_train,y_train)
x_predict=x.reshape(1,-1) #输入变为矩阵
kNN_classifier.predict(x_predict)
train_test_split
训练测试数据划分
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
accuracy_score
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_predict)
knn_clf.score(x_test,y_test)
超参数
knn是否考虑距离参数 weights=uniform/distance
超参数p p范数
Grid Search
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(knn_clf,param_grid)
grid_search.fit(x_train,y_train)
grid_search.best_estimator_
grid_search.best_score_
grid_search.best_params_
grid_search=GridSearchCV(knn_clf,param_grid,n_jobs=4,verbose=2)
#计算机分配几个核计算,n_jobs=-1,所有核。
verbose搜索过程中输出,越大越详细
数据归一化
最值归一化
均值方差归一化
StandardScaler
from sklearn.preprocessing import StandardScaler
standardScaler=StandardScaler()
standardScaler.fit(X_train)
standardScaler.mean_ #均值
standardScaler.std_ #standardScaler.scale_
X_train=standardScaler.transform(X_train)
X_test_standard=standardScaler.transform(X_test)