以莺尾花数据集为例,对其进行聚类,对比聚类和真实分类结果
导入相关库
% matplotlib inline
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
创建数据集
data = load_iris() # 传入莺尾花数据集
x = data['data']
y = data['target']
target_name = data['target_names']
fx = lambda x: target_name[x]
y_labels = list(map(fx, y))
进行聚类
clf = KMeans(n_clusters = 3) # 创建KM类,分成3类
clf.fit(x) #进行聚类
y_predict = clf.predict(x)
y_predict_labels = list(map(lambda x:['one','two','three'][x], y_predict))
画图对比,真实分类和聚类结果对比
sns.stripplot(x = y_predict_labels, y = x[:,1], hue = y_labels, jitter=True)
plt.legend(bbox_to_anchor = (1.2,1))
