1、算法描述

2、python代码实现
import numpy as np
import matplotlib.pyplot as plt
'''
AGNES层次聚类,采用自底向上聚合策略的算法。先将数据集的每个样本看做一个初始的聚类簇,然后算法运行的每一步中找出距离最近的两个
类簇进行合并,该过程不断重复,直至达到预设的聚类簇的个数。
'''
def calDist(X1 , X2 ):
sum = 0
for x1 , x2 in zip(X1 , X2):
sum += (x1 - x2) ** 2
return sum ** 0.5
def updateClusterDis(dataset,distance,sets,cluster_i):
i=0
while i<len(sets):
dis = []
for e in sets[i]:
for ele in sets[cluster_i]:
dis.append(calDist(dataset[e],dataset[ele]))
distance[i,cluster_i]=max(dis)
distance[cluster_i,i]=max(dis)
i+=1
distance[np.diag_indices_from(distance)] = float('inf')
return distance
def agens(dataset,k):
sets=[]
for i in range(0,len(dataset)):
sets.append({i})
delta = np.array(dataset[0] - dataset)
for e in dataset[1:, :]:
delta = np.vstack((delta, (e - dataset)))
distance = np.sqrt(np.sum(np.square(delta), axis=1))
distance = np.reshape(distance, (len(dataset), len(dataset)))
distance[np.diag_indices_from(distance)]=float('inf')
while len(sets)>k:
locations=np.argwhere(distance==np.min(distance))
locations=locations[locations[:,0]<locations[:,1]]
cluster_i=locations[0,0]
cluster_j=locations[0,1]
for e in sets[cluster_j]:
sets[cluster_i].add(e)
del sets[cluster_j]
distance=np.delete(distance,cluster_j,axis=0)
distance=np.delete(distance,cluster_j,axis=1)
distance=updateClusterDis(dataset,distance,sets,cluster_i)
print(sets)
return sets
dataset=np.loadtxt('data.txt')
results=agens(dataset,4)
for r in results:
drawpoints = []
for points in r:
drawpoints.append(points)
drawdata=dataset[drawpoints]
plt.scatter(drawdata[:, 0], drawdata[:, 1], marker='o')
plt.show()
3、结果
