import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import Birch
from sklearn.cluster import DBSCAN
import scipy.cluster.hierarchy as sch
from sklearn.cluster import KMeans
# X为样本特征,Y为样本簇类别, 共1000个样本,每个样本2个特征,共4个簇,簇中心在[-1,-1], [0,0],[1,1], [2,2]
X, y = make_blobs(n_samples=1000, n_features=2, centers=[[-1,-1], [0,0], [1,1], [2,2]], cluster_std=[0.4, 0.3, 0.4, 0.3],
random_state =9)
# plt.scatter(X[:, 0], X[:, 1], marker='o')
# plt.show()
#
# BRICH
birch_model = Birch(n_clusters = None, threshold = 0.3, branching_factor = 20).fit(X)
y_pred = birch_model.labels_
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()
print ("Calinski-Harabasz Score", metrics.calinski_harabaz_score(X, y_pred) )
print(y_pred)
print(len(y_pred))
#
# #DBSCAN
db_model = DBSCAN(eps=1.79, min_samples=4).fit(X)
y_pred = db_model.labels_
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()
# #cengci
disMat = sch.distance.pdist(X, 'cosine')
Z = sch.linkage(disMat, method='average')
# 将层级聚类结果以树状图表示出来并保存为plot_dendrogram.png
plt.figure(figsize=(8, 8))
P=sch.dendrogram(Z)
plt.savefig('bxk100_40_3.png')
# 根据linkage matrix Z得到聚类结果:
y_pred = sch.fcluster(Z, 0.8, 'distance', depth=2)
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()
# #Kmeans
Kmeans_model = KMeans(n_clusters=30, init='k-means++', max_iter=10000).fit(X)
y_pred = Kmeans_model.labels_
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.show()
print(y_pred)
print(len(y_pred))
常用聚类方法Demo
最新推荐文章于 2024-07-17 09:43:02 发布
本文通过使用多种聚类算法,如BIRCH、DBSCAN、层次聚类和K-means,对1000个样本进行聚类分析。通过调整算法参数,展示了不同算法在相同数据集上的表现,并使用Calinski-Harabasz Score评估了聚类效果。
2879

被折叠的 条评论
为什么被折叠?



