Tensorflow 实现 kMeans

最新推荐文章于 2023-05-04 21:31:21 发布

jiarenyf

最新推荐文章于 2023-05-04 21:31:21 发布

阅读量1.5k

点赞数 1

分类专栏： tensorflow python machine-learning 文章标签： tensorflow kmeans

machine-learning 同时被 3 个专栏收录

34 篇文章

订阅专栏

python

27 篇文章

订阅专栏

tensorflow

1 篇文章

订阅专栏

#! /usr/bin/python
# -*- coding: utf8 -*-

import tensorflow as tf
from random import sample


def KMeansCluster(points, K, max_iters=100, first_centroids=None, predict_method=None):
    N, D = points.shape

    # 初始聚类中心……
    centroids = tf.Variable(points[sample(range(N), K)] if first_centroids is None else first_centroids)
    # 样本归属聚类中心……
    cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64))

    # 同时计算所有样本与聚类中心的距离……
    rep_points = tf.reshape(tf.tile(points, [1, K]), [N, K, D])
    rep_centroids = tf.reshape(tf.tile(centroids, [N, 1]), [N, K, D])
    sum_squares = tf.reduce_sum(tf.square(rep_points - rep_centroids), reduction_indices=2)

    # 样本对应的聚类中心索引……
    best_centroids = tf.argmin(sum_squares, 1)
    # 新聚类中心对应的样本索引……
    centroids_indies = tf.argmin(sum_squares, 0)

    # 按照`best_centroids`中相同的索引，将points求和……
    total = tf.unsorted_segment_sum(points, best_centroids, K)
    # 按照`best_centroids`中相同的索引，将points计数……
    count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids, K)
    # 以均值作为新聚类中心的值……
    means = total / count

    did_assignments_change = tf.reduce_any(tf.not_equal(best_centroids, cluster_assignments))

    with tf.control_dependencies([did_assignments_change]):
        do_updates = tf.group(centroids.assign(means), cluster_assignments.assign(best_centroids))

############################################################################################

    init = tf.initialize_all_variables()

    sess = tf.Session()
    sess.run(init)

    iters, changed = 0, True
    while changed and iters < max_iters:
        iters += 1
        [changed, _] = sess.run([did_assignments_change, do_updates])

    [centers, cindies, assignments] = sess.run([centroids, centroids_indies, cluster_assignments])
    predict_indies = map(lambda a: cindies[a], assignments)

    if predict_method is None:
        return iters, centers, assignments
    else:
        predict_labels = map(predict_method, predict_indies)
        return iters, centers, assignments, predict_labels


def test_KMeansCluster():
    import numpy as np

    N, D, K = 10000, 2, 20
    points = np.ndarray((N, D), dtype=np.float32)

    for i in xrange(K):
        points[N / K * i:N / K * (i + 1)] = np.random.uniform(low=5 * i, high=5 * (i + 0.5), size=(N / K, D))

    iters, centers, assignments, predict_labels = \
        KMeansCluster(points, K, max_iters=100, predict_method=lambda pi: pi / (N / K))

    test_acc = sum(map(lambda i, pl: pl == (i / (N / K)), range(N), predict_labels)) * 1.0 / N
    print('Test acc: %.3f%%' % (test_acc * 100))


if __name__ == '__main__':
    test_KMeansCluster()