Tensorflow 实现 kMeans

#! /usr/bin/python
# -*- coding: utf8 -*-

import tensorflow as tf
from random import sample


def KMeansCluster(points, K, max_iters=100, first_centroids=None, predict_method=None):
    N, D = points.shape

    # 初始聚类中心……
    centroids = tf.Variable(points[sample(range(N), K)] if first_centroids is None else first_centroids)
    # 样本归属聚类中心……
    cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64))

    # 同时计算所有样本与聚类中心的距离……
    rep_points = tf.reshape(tf.tile(points, [1, K]), [N, K, D])
    rep_centroids = tf.reshape(tf.tile(centroids, [N, 1]), [N, K, D])
    sum_squares = tf.reduce_sum(tf.square(rep_points - rep_centroids), reduction_indices=2)

    # 样本对应的聚类中心索引……
    best_centroids = tf.argmin(sum_squares, 1)
    # 新聚类中心对应的样本索引……
    centroids_indies = tf.argmin(sum_squares, 0)

    # 按照`best_centroids`中相同的索引,将points求和……
    total = tf.unsorted_segment_sum(points, best_centroids, K)
    # 按照`best_centroids`中相同的索引,将points计数……
    count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids, K)
    # 以均值作为新聚类中心的值……
    means = total / count

    did_assignments_change = tf.reduce_any(tf.not_equal(best_centroids, cluster_assignments))

    with tf.control_dependencies([did_assignments_change]):
        do_updates = tf.group(centroids.assign(means), cluster_assignments.assign(best_centroids))

############################################################################################

    init = tf.initialize_all_variables()

    sess = tf.Session()
    sess.run(init)

    iters, changed = 0, True
    while changed and iters < max_iters:
        iters += 1
        [changed, _] = sess.run([did_assignments_change, do_updates])

    [centers, cindies, assignments] = sess.run([centroids, centroids_indies, cluster_assignments])
    predict_indies = map(lambda a: cindies[a], assignments)

    if predict_method is None:
        return iters, centers, assignments
    else:
        predict_labels = map(predict_method, predict_indies)
        return iters, centers, assignments, predict_labels


def test_KMeansCluster():
    import numpy as np

    N, D, K = 10000, 2, 20
    points = np.ndarray((N, D), dtype=np.float32)

    for i in xrange(K):
        points[N / K * i:N / K * (i + 1)] = np.random.uniform(low=5 * i, high=5 * (i + 0.5), size=(N / K, D))

    iters, centers, assignments, predict_labels = \
        KMeansCluster(points, K, max_iters=100, predict_method=lambda pi: pi / (N / K))

    test_acc = sum(map(lambda i, pl: pl == (i / (N / K)), range(N), predict_labels)) * 1.0 / N
    print('Test acc: %.3f%%' % (test_acc * 100))


if __name__ == '__main__':
    test_KMeansCluster()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值