kmean聚类是最基础和常见的算法,工程上使用比较常见,spark, sklearn都有实现,本文手写实现kmeans
#!/usr/bin/python
import sys
import random
import math
def create_rand_points(max_x, max_y, count):
"""Create count points (0-x), (0-y).
"""
points = []
for i in range(0, count):
x = random.randint(0, max_x)
y = random.randint(0, max_y)
points.append([x,y])
return points
def get_start_k_points(points, k):
"""Get k start points.
"""
if k > len(points):
return None
random.shuffle(points)
return points[0:k]
def get_nearest_point_index(point, central_points):
"""
"""
min_dis = 2000000000
index = 0
for i in range(0, len(central_points)):
dis = 0.0
for j in range(0, len(point)):
dis += (point[j]-central_points[i][j]) * (point[j]-central_points[i][j])
if math