依旧根据麦子学院彭亮老师的机器学习视频整理。
1.数据集(为UCI数据集的‘iris’数据集),粘贴出来,存为iris.data文件:
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
2.程序实现:
#coding=utf-8
import csv
import random
import math
import operator
# 读取数据并且将数据分成了训练集和测试集
def loadDataset(filename,split,trainingSet=[],testSet=[]):
with open(filename,'rb') as csvfile:
lines = csv.reader(csvfile) #读取数的每一行
dataset= list(lines) #将数据变成了列表
print dataset
#print len(dataset)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] =float(dataset[x][y]) #数据类型需要是float才能进行数据处理
if random.random() < split:
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
def euclideanDistance(instance1,instance2,length):
distance = 0
for x in range(length):
distance +=pow(instance1[x] - instance2[x],2)
return math.sqrt(distance)
def getNeighbors(trainingSet,testInstance,k):
distances = []
length = len(testInstance)-1 #testInstance为待检测的样本,维数减1,去除标签,得到特征数
for x in range(len(trainingSet)): #遍历所有的训练样本
dist = euclideanDistance(testInstance,trainingSet[x],length)
distances.append((trainingSet[x],dist)) #把dist加进去了(([a, b, c, d, 'Iris-setosa'], dist))
distances.sort(key = operator.itemgetter(1)) #将测试集所有数据和Instance的距离求出来排序
neighbors = []
for x in range(k):
neighbors.append(distances[x][0]) #取的是trainingSet
return neighbors
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1] #得到标签
#print(response)
if response in classVotes: #response为邻居们的标签
classVotes[response] += 1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.items(),key = operator.itemgetter(1),reverse = True) #operator.itemgetter(item),返回一个可调用的对象,该对象通过运算符的__getitem__()的方法从运算中获取item. operator.getitem(a,b) 返回a中索引为b的值。
#从大到小排序
#print(sortedVotes[0][0])
#print(classVotes.items())
#print(sortedVotes)
return sortedVotes[0][0] #给出排列在第一的目标
#对于测试样本的准确率计算:
def getAccuracy(testSet,predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct +=1
return (correct/float(len(testSet)) *100.0)
def main():
trainingSet = []
testSet = []
split = 0.67
loadDataset(r'iris.data',split,trainingSet,testSet)
print 'Train set:' + repr(len(trainingSet))
print 'Test set:' + repr(len(testSet))
predictions = []
k = 3
for x in range(len(testSet)):
neighbors = getNeighbors(trainingSet,testSet[x],k)
result = getResponse(neighbors)
predictions.append(result)
print('>predicted=' +repr(result) +',actual = '+repr(testSet[x][-1]))
accuracy =getAccuracy(testSet,predictions)
print 'Accuracy:' +repr(accuracy) +'%'
main()
3.结果:
/home/troy/anaconda2/bin/python /home/troy/1.py
[['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'], ['4.9', '3.0', '1.4', '0.2', 'Iris-setosa'], ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa'], ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa'], ['5.0', '3.6', '1.4', '0.2', 'Iris-setosa'], ['5.4', '3.9', '1.7', '0.4', 'Iris-setosa'], ['4.6', '3.4', '1.4', '0.3', 'Iris-setosa'], ['5.0', '3.4', '1.5', '0.2', 'Iris-setosa'], ['4.4', '2.9', '1.4', '0.2', 'Iris-setosa'], ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'], ['5.4', '3.7', '1.5', '0.2', 'Iris-setosa'], ['4.8', '3.4', '1.6', '0.2', 'Iris-setosa'], ['4.8', '3.0', '1.4', '0.1', 'Iris-setosa'], ['4.3', '3.0', '1.1', '0.1', 'Iris-setosa'], ['5.8', '4.0', '1.2', '0.2', 'Iris-setosa'], ['5.7', '4.4', '1.5', '0.4', 'Iris-setosa'], ['5.4', '3.9', '1.3', '0.4', 'Iris-setosa'], ['5.1', '3.5', '1.4', '0.3', 'Iris-setosa'], ['5.7', '3.8', '1.7', '0.3', 'Iris-setosa'], ['5.1', '3.8', '1.5', '0.3', 'Iris-setosa'], ['5.4', '3.4', '1.7', '0.2', 'Iris-setosa'], ['5.1', '3.7', '1.5', '0.4', 'Iris-setosa'], ['4.6', '3.6', '1.0', '0.2', 'Iris-setosa'], ['5.1', '3.3', '1.7', '0.5', 'Iris-setosa'], ['4.8', '3.4', '1.9', '0.2', 'Iris-setosa'], ['5.0', '3.0', '1.6', '0.2', 'Iris-setosa'], ['5.0', '3.4', '1.6', '0.4', 'Iris-setosa'], ['5.2', '3.5', '1.5', '0.2', 'Iris-setosa'], ['5.2', '3.4', '1.4', '0.2', 'Iris-setosa'], ['4.7', '3.2', '1.6', '0.2', 'Iris-setosa'], ['4.8', '3.1', '1.6', '0.2', 'Iris-setosa'], ['5.4', '3.4', '1.5', '0.4', 'Iris-setosa'], ['5.2', '4.1', '1.5', '0.1', 'Iris-setosa'], ['5.5', '4.2', '1.4', '0.2', 'Iris-setosa'], ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'], ['5.0', '3.2', '1.2', '0.2', 'Iris-setosa'], ['5.5', '3.5', '1.3', '0.2', 'Iris-setosa'], ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'], ['4.4', '3.0', '1.3', '0.2', 'Iris-setosa'], ['5.1', '3.4', '1.5', '0.2', 'Iris-setosa'], ['5.0', '3.5', '1.3', '0.3', 'Iris-setosa'], ['4.5', '2.3', '1.3', '0.3', 'Iris-setosa'], ['4.4', '3.2', '1.3', '0.2', 'Iris-setosa'], ['5.0', '3.5', '1.6', '0.6', 'Iris-setosa'], ['5.1', '3.8', '1.9', '0.4', 'Iris-setosa'], ['4.8', '3.0', '1.4', '0.3', 'Iris-setosa'], ['5.1', '3.8', '1.6', '0.2', 'Iris-setosa'], ['4.6', '3.2', '1.4', '0.2', 'Iris-setosa'], ['5.3', '3.7', '1.5', '0.2', 'Iris-setosa'], ['5.0', '3.3', '1.4', '0.2', 'Iris-setosa'], ['7.0', '3.2', '4.7', '1.4', 'Iris-versicolor'], ['6.4', '3.2', '4.5', '1.5', 'Iris-versicolor'], ['6.9', '3.1', '4.9', '1.5', 'Iris-versicolor'], ['5.5', '2.3', '4.0', '1.3', 'Iris-versicolor'], ['6.5', '2.8', '4.6', '1.5', 'Iris-versicolor'], ['5.7', '2.8', '4.5', '1.3', 'Iris-versicolor'], ['6.3', '3.3', '4.7', '1.6', 'Iris-versicolor'], ['4.9', '2.4', '3.3', '1.0', 'Iris-versicolor'], ['6.6', '2.9', '4.6', '1.3', 'Iris-versicolor'], ['5.2', '2.7', '3.9', '1.4', 'Iris-versicolor'], ['5.0', '2.0', '3.5', '1.0', 'Iris-versicolor'], ['5.9', '3.0', '4.2', '1.5', 'Iris-versicolor'], ['6.0', '2.2', '4.0', '1.0', 'Iris-versicolor'], ['6.1', '2.9', '4.7', '1.4', 'Iris-versicolor'], ['5.6', '2.9', '3.6', '1.3', 'Iris-versicolor'], ['6.7', '3.1', '4.4', '1.4', 'Iris-versicolor'], ['5.6', '3.0', '4.5', '1.5', 'Iris-versicolor'], ['5.8', '2.7', '4.1', '1.0', 'Iris-versicolor'], ['6.2', '2.2', '4.5', '1.5', 'Iris-versicolor'], ['5.6', '2.5', '3.9', '1.1', 'Iris-versicolor'], ['5.9', '3.2', '4.8', '1.8', 'Iris-versicolor'], ['6.1', '2.8', '4.0', '1.3', 'Iris-versicolor'], ['6.3', '2.5', '4.9', '1.5', 'Iris-versicolor'], ['6.1', '2.8', '4.7', '1.2', 'Iris-versicolor'], ['6.4', '2.9', '4.3', '1.3', 'Iris-versicolor'], ['6.6', '3.0', '4.4', '1.4', 'Iris-versicolor'], ['6.8', '2.8', '4.8', '1.4', 'Iris-versicolor'], ['6.7', '3.0', '5.0', '1.7', 'Iris-versicolor'], ['6.0', '2.9', '4.5', '1.5', 'Iris-versicolor'], ['5.7', '2.6', '3.5', '1.0', 'Iris-versicolor'], ['5.5', '2.4', '3.8', '1.1', 'Iris-versicolor'], ['5.5', '2.4', '3.7', '1.0', 'Iris-versicolor'], ['5.8', '2.7', '3.9', '1.2', 'Iris-versicolor'], ['6.0', '2.7', '5.1', '1.6', 'Iris-versicolor'], ['5.4', '3.0', '4.5', '1.5', 'Iris-versicolor'], ['6.0', '3.4', '4.5', '1.6', 'Iris-versicolor'], ['6.7', '3.1', '4.7', '1.5', 'Iris-versicolor'], ['6.3', '2.3', '4.4', '1.3', 'Iris-versicolor'], ['5.6', '3.0', '4.1', '1.3', 'Iris-versicolor'], ['5.5', '2.5', '4.0', '1.3', 'Iris-versicolor'], ['5.5', '2.6', '4.4', '1.2', 'Iris-versicolor'], ['6.1', '3.0', '4.6', '1.4', 'Iris-versicolor'], ['5.8', '2.6', '4.0', '1.2', 'Iris-versicolor'], ['5.0', '2.3', '3.3', '1.0', 'Iris-versicolor'], ['5.6', '2.7', '4.2', '1.3', 'Iris-versicolor'], ['5.7', '3.0', '4.2', '1.2', 'Iris-versicolor'], ['5.7', '2.9', '4.2', '1.3', 'Iris-versicolor'], ['6.2', '2.9', '4.3', '1.3', 'Iris-versicolor'], ['5.1', '2.5', '3.0', '1.1', 'Iris-versicolor'], ['5.7', '2.8', '4.1', '1.3', 'Iris-versicolor'], ['6.3', '3.3', '6.0', '2.5', 'Iris-virginica'], ['5.8', '2.7', '5.1', '1.9', 'Iris-virginica'], ['7.1', '3.0', '5.9', '2.1', 'Iris-virginica'], ['6.3', '2.9', '5.6', '1.8', 'Iris-virginica'], ['6.5', '3.0', '5.8', '2.2', 'Iris-virginica'], ['7.6', '3.0', '6.6', '2.1', 'Iris-virginica'], ['4.9', '2.5', '4.5', '1.7', 'Iris-virginica'], ['7.3', '2.9', '6.3', '1.8', 'Iris-virginica'], ['6.7', '2.5', '5.8', '1.8', 'Iris-virginica'], ['7.2', '3.6', '6.1', '2.5', 'Iris-virginica'], ['6.5', '3.2', '5.1', '2.0', 'Iris-virginica'], ['6.4', '2.7', '5.3', '1.9', 'Iris-virginica'], ['6.8', '3.0', '5.5', '2.1', 'Iris-virginica'], ['5.7', '2.5', '5.0', '2.0', 'Iris-virginica'], ['5.8', '2.8', '5.1', '2.4', 'Iris-virginica'], ['6.4', '3.2', '5.3', '2.3', 'Iris-virginica'], ['6.5', '3.0', '5.5', '1.8', 'Iris-virginica'], ['7.7', '3.8', '6.7', '2.2', 'Iris-virginica'], ['7.7', '2.6', '6.9', '2.3', 'Iris-virginica'], ['6.0', '2.2', '5.0', '1.5', 'Iris-virginica'], ['6.9', '3.2', '5.7', '2.3', 'Iris-virginica'], ['5.6', '2.8', '4.9', '2.0', 'Iris-virginica'], ['7.7', '2.8', '6.7', '2.0', 'Iris-virginica'], ['6.3', '2.7', '4.9', '1.8', 'Iris-virginica'], ['6.7', '3.3', '5.7', '2.1', 'Iris-virginica'], ['7.2', '3.2', '6.0', '1.8', 'Iris-virginica'], ['6.2', '2.8', '4.8', '1.8', 'Iris-virginica'], ['6.1', '3.0', '4.9', '1.8', 'Iris-virginica'], ['6.4', '2.8', '5.6', '2.1', 'Iris-virginica'], ['7.2', '3.0', '5.8', '1.6', 'Iris-virginica'], ['7.4', '2.8', '6.1', '1.9', 'Iris-virginica'], ['7.9', '3.8', '6.4', '2.0', 'Iris-virginica'], ['6.4', '2.8', '5.6', '2.2', 'Iris-virginica'], ['6.3', '2.8', '5.1', '1.5', 'Iris-virginica'], ['6.1', '2.6', '5.6', '1.4', 'Iris-virginica'], ['7.7', '3.0', '6.1', '2.3', 'Iris-virginica'], ['6.3', '3.4', '5.6', '2.4', 'Iris-virginica'], ['6.4', '3.1', '5.5', '1.8', 'Iris-virginica'], ['6.0', '3.0', '4.8', '1.8', 'Iris-virginica'], ['6.9', '3.1', '5.4', '2.1', 'Iris-virginica'], ['6.7', '3.1', '5.6', '2.4', 'Iris-virginica'], ['6.9', '3.1', '5.1', '2.3', 'Iris-virginica'], ['5.8', '2.7', '5.1', '1.9', 'Iris-virginica'], ['6.8', '3.2', '5.9', '2.3', 'Iris-virginica'], ['6.7', '3.3', '5.7', '2.5', 'Iris-virginica'], ['6.7', '3.0', '5.2', '2.3', 'Iris-virginica'], ['6.3', '2.5', '5.0', '1.9', 'Iris-virginica'], ['6.5', '3.0', '5.2', '2.0', 'Iris-virginica'], ['6.2', '3.4', '5.4', '2.3', 'Iris-virginica'], ['5.9', '3.0', '5.1', '1.8', 'Iris-virginica'], []]
Train set:98
Test set:52
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-setosa',actual = 'Iris-setosa'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-versicolor',actual = 'Iris-versicolor'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-versicolor',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
>predicted='Iris-virginica',actual = 'Iris-virginica'
Accuracy:98.07692307692307%
Process finished with exit code 0