import numpy as np # import everything from numpyimport operator # operator moduledefcreateDataSet():# there are two [] for array
group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']return group, labels
import numpy as np
deffile2matrix(filename):# 构建标签与数字的对应字典
love_dictionary ={
'largeDoses':3,'smallDoses':2,'didntLike':1}
fr =open(filename)# realines() return the list of lines
arrayOLines = fr.readlines()
numberOfLines =len(arrayOLines)#get the number of lines in the file# 构建数据集矩阵
returnMat = np.zeros((numberOfLines,3))#prepare matrix to return# construct the labels vector
classLabelVector =[]#prepare labels return# index is the row index of the matrix
index =0for line in arrayOLines:# get rid of the spaces at the front and the end of every line
line = line.strip()# split the string into list by tab
listFromLine = line.split('\t')# put the data which are the first three elements of the listFromline into every row
returnMat[index,:]= listFromLine[0:3]# put the label which is the fourth element of listFromline into labels vectorif(listFromLine[-1].isdigit()):
classLabelVector.append(int(listFromLine[-1]))else:
classLabelVector.append(love_dictionary.get(listFromLine[-1]))
index +=1return returnMat, classLabelVector
datingDataMat, datingLabels=file2matrix('D:/pythoncode/machine learning in action/DatingTestSet.txt')
import matplotlib
import matplotlib.pyplot as plt
# creat a new figure
fig=plt.figure()# creat axes add_subplot(row_quantity,column_quantity,position) used as figure.add_subplot# the same as plt.subplot(row_quantity,column_quantity,position)
ax=fig.add_subplot(111)# scatter(x,y)#以玩游戏所耗时间比, 每周消耗冰激凌的公升数来构建散点图
ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
plt.show(