提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
提示:这里可以添加本文要记录的大概内容:
聚类算法部分的论文学习笔记
提示:以下是本篇文章正文内容,下面案例可供参考
一、DBSAN聚类算法
1、输入:样本数据
输出:聚类结果
1)、计算样本中各个点与其他点之间的欧几里得距离:
2)、两个点 A= (a[1],a[2],…,a[n]) 和 B = (b[1],b[2],…,b[n]) 之间的
距离ρ(A,B)定义为下面的公式 3-4;
3)、获得每一个点的 K 距离,并将 K 距离排序
4)、确定半径 eps 的值;
5)、计算中心点,建立所有中心点之间间隔小于半径 eps 的映射;
6)、根据得到的核心点和半径 eps,计算出和核心点联通的点和离群的点;
7)、将能够联通,以及距离小于半径的点,生成一个集合,即一个簇。

二、使用步骤
1.引入库
代码如下(示例):
from sklearn import datasets
import numpy as np
import random
import matplotlib.pyplot as plt
import time
import copy
2.读入数据
代码如下(示例):
def find_neighbor(j, x, eps):#确定邻域
N = list()
for i in range(x.shape[0]):
temp = np.sqrt(np.sum(np.square(x[j]-x[i]))) # 计算欧式距离
if temp <= eps:
N.append(i)
return set(N)
def DBSCAN(X, eps, min_Pts):
k = -1
neighbor_list = [] # 用来保存每个数据的邻域
omega_list = [] # 核心对象集合
gama = set([x for x in range(len(X))]) # 初始时将所有点标记为未访问
cluster = [-1 for _ in range(len(X))] # 聚类
for i in range(len(X)):
neighbor_list.append(find_neighbor(i, X, eps))
if len(neighbor_list[-1]) >= min_Pts:
omega_list.append(i) # 将样本加入核心对象集合
omega_list = set(omega_list) # 转化为集合便于操作
while len(omega_list) > 0:
gama_old = copy.deepcopy(gama)
j = random.choice(list(omega_list)) # 随机选取一个核心对象
k = k + 1
Q = list()
Q.append(j)
gama.remove(j)
while len(Q) > 0:
q = Q[0]
Q.remove(q)
if len(neighbor_list[q]) >= min_Pts:
delta = neighbor_list[q] & gama
deltalist = list(delta)
for i in range(len(delta)):
Q.append(deltalist[i])
gama = gama - delta
Ck = gama_old - gama
Cklist = list(Ck)
for i in range(len(Ck)):
cluster[Cklist[i]] = k
omega_list = omega_list - Ck
return cluster
X1, y1 = datasets.make_circles(n_samples=2000, factor=.6, noise=.02)
X2, y2 = datasets.make_blobs(n_samples=400, n_features=2, centers=[[1.2, 1.2]], cluster_std=[[.1]], random_state=9)
X = np.concatenate((X1, X2))
eps = 0.08
min_Pts = 10
begin = time.time()
C = DBSCAN(X, eps, min_Pts)
end = time.time()
plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=C)
plt.show()
3.完整代码
from sklearn import datasets
import numpy as np
import random
import matplotlib.pyplot as plt
import time
import copy
def find_neighbor(j, x, eps):
N = list()
for i in range(x.shape[0]):
temp = np.sqrt(np.sum(np.square(x[j]-x[i]))) # 计算欧式距离
if temp <= eps:
N.append(i)
return set(N)
def DBSCAN(X, eps, min_Pts):
k = -1
neighbor_list = [] # 用来保存每个数据的邻域
omega_list = [] # 核心对象集合
gama = set([x for x in range(len(X))]) # 初始时将所有点标记为未访问
cluster = [-1 for _ in range(len(X))] # 聚类
for i in range(len(X)):
neighbor_list.append(find_neighbor(i, X, eps))
if len(neighbor_list[-1]) >= min_Pts:
omega_list.append(i) # 将样本加入核心对象集合
omega_list = set(omega_list) # 转化为集合便于操作
while len(omega_list) > 0:
gama_old = copy.deepcopy(gama)
j = random.choice(list(omega_list)) # 随机选取一个核心对象
k = k + 1
Q = list()
Q.append(j)
gama.remove(j)
while len(Q) > 0:
q = Q[0]
Q.remove(q)
if len(neighbor_list[q]) >= min_Pts:
delta = neighbor_list[q] & gama
deltalist = list(delta)
for i in range(len(delta)):
Q.append(deltalist[i])
gama = gama - delta
Ck = gama_old - gama
Cklist = list(Ck)
for i in range(len(Ck)):
cluster[Cklist[i]] = k
omega_list = omega_list - Ck
return cluster
X1, y1 = datasets.make_circles(n_samples=2000, factor=.6, noise=.02)
X2, y2 = datasets.make_blobs(n_samples=400, n_features=2, centers=[[1.2, 1.2]], cluster_std=[[.1]], random_state=9)
X = np.concatenate((X1, X2))
eps = 0.08
min_Pts = 10
begin = time.time()
C = DBSCAN(X, eps, min_Pts)
end = time.time()
plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=C)
plt.show()
DBSCAN聚类算法实现与应用
422

被折叠的 条评论
为什么被折叠?



