#导入csv,读取数据
import csv
#导入random,进行随机变量的运算
import random
#导入math,进行数学运算
import math
import operator
#下载算法所需的数据集,并将数据集分为训练集和测试集
#参数:filename文件路径名;以split为界限将数据集分为训练集(产生模型)和测试集;
def loadDataset(filename,split,trainingSet=[],testSet=[]):
with open(filename,"r") as csvfile:
#读取所有的行
lines = csv.reader(csvfile)
#将所有行的数据转换成list格式
dataset = list(lines)
#数据集为len行5列(0-4)
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
#训练集
if random.random() < split:
trainingSet.append(dataset[x])
#测试集
else:
testSet.append(dataset[x])
#计算任意两个实例的距离
#距离的衡量:欧氏距离(实例可以是任意维度的)
def euclideanDistance(instance1,instance2,length):
distance = 0
#length为维度
for x in range(length):
#相对应求差的平方,再累加
distance += pow((instance1[x]-instance2[x]),2)
#开平方math.sqrt
return math.sqrt(distance)
#返回测试实例最近的K个邻居
#给定训练集和测试实例以及邻居个数K
#计算测试实例到训练集中每个实例的距离,距离值按从小到大排列选出K个
def getNeighbors(trainingSet,testInstance,k):
#distances装所有的距离
distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance,trainingSet[x],length)
distances.append((trainingSet[x],dist))
distances.sort(key = operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
#返回K个邻居
return neighbors
#少数服从多数法则
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] +=1
else:
classVotes[response] = 1
sorteVotes = sorted(classVotes.items(),key = operator.itemgetter(1),reverse =True)
return sorteVotes[0][0]
#算法的准确率
def getAccuracy(testSet,predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet))) * 100.0
def main():
trainingSet = []
testSet = []
split = 0.67
loadDataset(r'F:\iris.txt',split,trainingSet,testSet)
print('Train Set:' + repr(len(trainingSet)))
print('Test Set:' + repr(len(testSet)))
predictions = []
k = 3
for x in range(len(testSet)):
neighbors = getNeighbors(trainingSet,testSet[x],k)
result = getResponse(neighbors)
predictions.append(result)
print('predicted='+ repr(result)+ ',actual='+repr(testSet[x][-1]))
accuracy = getAccuracy(testSet,predictions)
print('Accuracy:'+ repr(accuracy)+'%')
main()