#! /usr/bin/python
# -*- coding: utf8 -*-
import tensorflow as tf
from random import sample
def KMeansCluster(points, K, max_iters=100, first_centroids=None, predict_method=None):
N, D = points.shape
# 初始聚类中心……
centroids = tf.Variable(points[sample(range(N), K)] if first_centroids is None else first_centroids)
# 样本归属聚类中心……
cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64))
# 同时计算所有样本与聚类中心的距离……
rep_points = tf.reshape(tf.tile(points, [1, K]), [N, K, D])
rep_centroids = tf.reshape(tf.tile(centroids, [N, 1]), [N, K, D])
sum_squares = tf.reduce_sum(tf.square(rep_points - rep_centroids), reduction_indices=2)
# 样本对应的聚类中心索引……
best_centroids = tf.argmin(sum_squares, 1)
# 新聚类中心对应的样本索引……
centroids_indies = tf.argmin(sum_squares, 0)
# 按照`best_centroids`中相同的索引,将points求和……
total = tf.unsorted_segment_sum(points, best_centroids, K)
# 按照`best_centroids`中相同的索引,将points计数……
count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids, K)
# 以均值作为新聚类中心的值……
means = total / count
did_assignments_change = tf.reduce_any(tf.not_equal(best_centroids, cluster_assignments))
with tf.control_dependencies([did_assignments_change]):
do_updates = tf.group(centroids.assign(means), cluster_assignments.assign(best_centroids))
############################################################################################
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
iters, changed = 0, True
while changed and iters < max_iters:
iters += 1
[changed, _] = sess.run([did_assignments_change, do_updates])
[centers, cindies, assignments] = sess.run([centroids, centroids_indies, cluster_assignments])
predict_indies = map(lambda a: cindies[a], assignments)
if predict_method is None:
return iters, centers, assignments
else:
predict_labels = map(predict_method, predict_indies)
return iters, centers, assignments, predict_labels
def test_KMeansCluster():
import numpy as np
N, D, K = 10000, 2, 20
points = np.ndarray((N, D), dtype=np.float32)
for i in xrange(K):
points[N / K * i:N / K * (i + 1)] = np.random.uniform(low=5 * i, high=5 * (i + 0.5), size=(N / K, D))
iters, centers, assignments, predict_labels = \
KMeansCluster(points, K, max_iters=100, predict_method=lambda pi: pi / (N / K))
test_acc = sum(map(lambda i, pl: pl == (i / (N / K)), range(N), predict_labels)) * 1.0 / N
print('Test acc: %.3f%%' % (test_acc * 100))
if __name__ == '__main__':
test_KMeansCluster()
Tensorflow 实现 kMeans
最新推荐文章于 2023-05-04 21:31:21 发布