代码如下:
from scipy.spatial import distance #a是距离训练数据的一点,b是距离测试数据的一点 def euc(a,b): return distance.euclidean(a,b) class ScrappyKNN(): def fit(self,x_train,y_train): self.x_train=x_train self.y_train=y_train def predict(self,x_test): predictions=[] for row in x_test: label=self.closest(row) predictions.append(label) return predictions def closest(self,row): best_dist=euc(row, self.x_train[0]) best_index=0 for i in range(1, len(self.x_train)): dist=euc(row, self.x_train[i]) if dist<best_dist: best_dist=dist best_index=i return self.y_train[best_index] from sklearn import datasets#导入数据集 iris=datasets.load_iris() x=iris.data y=iris.target from sklearn.cross_validation import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size= .5) #使用决策树 # from sklearn import tree # my_classifier=tree.DecisionTreeClassifier() #另一种分类器 #from sklearn.neighbors import KNeighborsClassifiers my_classifier=ScrappyKNN() my_classifier.fit(x_train,y_train) predictions=my_classifier.predict(x_test) print(predictions)#输出测试的结果 from sklearn.metrics import accuracy_score print(accuracy_score(y_test,predictions))#测试的准确率(每次运行的的到的准确率可能不同,这是由训练测试的随机分成造成的)