# 导入工具库
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
import numpy as np
# 加载数据
iris = load_iris()
X, y = iris.data, iris.target
# 切分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=32)
# 数据预处理
scaler = MinMaxScaler() # 生成转换器:处理数据归一化,最大最小值归一化
X_train = scaler.fit_transform(X_train) # 拟合并转换训练集数据 [注意:只拟合训练集数据]
X_test = scaler.transform(X_test) # 转换测试集数据
data_min = scaler.data_min_ # 训练集各列最小值
data_max = scaler.data_max_ # 训练集各列最大值
print("最小值:",data_min)
print("最大值:",data_max)
#网格验证
param_grid = {'n_neighbors': range(1,21)} # KNN算法中的n_neighbors参数
moddel_knn_2 = KNeighborsClassifier(param_grid)
# 根据精确率进行交叉验证,输出每组数据的模型精确率
grid_search = GridSearchCV(moddel_knn_2,param_grid,cv=cv,scoring='accuracy')
grid_search.fit(X_train,y_train)
print('最高精确率:',grid_search.best_score_)
print('n_neighbors值:',grid_search.best_params_)
# 训练与预测
knn = KNeighborsClassifier(n_neighbors=grid_search.best_params_['n_neighbors']) # 定义KNN分类器对象
knn.fit(X_train, y_train) # 使用KNN算法拟合测试集数据
y_pred = knn.predict(X_test) # 使用模型对测试集进行预测
# 评估
score = accuracy_score(y_test, y_pred) # 比较预测结果和测试集(分类标签)的准确率
print(f"{score:.2%}")
# 输出模型文件
import pickle # 导入pickle包,无需pip install
# 保存模型,用“wb”方式写入,即是二进制方式
pickle.dump(knn,open("knn.pkl","wb")) # open("knn.pkl","wb")意思是打开叫"knn.pkl"的文件,操作方式是写入二进制数据
# 加载模型
loaded_model = pickle.load(open("knn.pkl","rb"))
# 给出新数据,进行预测
x_new = np.array([[5.1,3.5,1.4,0.2],[7.0,3.2,4.7,1.4],[6.3,3.3,6.0,2.5]]) # 这里只是暂时选取了每个分类的第一行记录来演示过程
x_new_mms = (x_new - data_min) / (data_max - data_min) # 对新数据做归一化处理
# print(x_new_mms)
y_new = loaded_model.predict(x_new_mms)
y_new