import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import metrics
train, test = train_test_split(iris, test_size =0.3)print(train.shape)print(test.shape)
(105, 5)
(45, 5)
train_X = train[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
train_y=train.Species# output of our training data
test_X= test[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
test_y =test.Species
#Visualising the clusters
plt.scatter(x[y_kmeans ==0,0], x[y_kmeans ==0,1], s =100, c ='purple', label ='0')
plt.scatter(x[y_kmeans ==1,0], x[y_kmeans ==1,1], s =100, c ='orange', label ='1')
plt.scatter(x[y_kmeans ==2,0], x[y_kmeans ==2,1], s =100, c ='green', label ='2')#Plotting the centroids of the clusters
plt.scatter(kmeans.cluster_centers_[:,0], kmeans.cluster_centers_[:,1], s =100, c ='red', label ='Centroids')
plt.legend()
<matplotlib.legend.Legend at 0x1b0d536c8b0>
model = KMeans(n_clusters=3)
model.fit(train_X,train_y)
prediction=model.predict(test_X)print('The accuracy of the KMeans is',metrics.accuracy_score(prediction,test_y))
The accuracy of the KMeans is 0.9111111111111111
r1 = pd.Series(model.labels_).value_counts()
r2 = pd.DataFrame(model.cluster_centers_)#找出聚类中心
r = pd.concat([r2, r1], axis =1)#横向连接(0是纵向), 得到聚类中心对应的类别下的数目
model=KMeans(n_clusters=3)
model.fit(train_x_p,train_y_p)
prediction=model.predict(test_x_p)print('The accuracy of the KMeans using Petals is:',metrics.accuracy_score(prediction,test_y_p))
model.fit(train_x_s,train_y_s)
prediction=model.predict(test_x_s)print('The accuracy of the KMeans using Sepals is:',metrics.accuracy_score(prediction,test_y_s))
The accuracy of the KMeans using Petals is: 0.4
The accuracy of the KMeans using Sepals is: 0.13333333333333333