聚类算法部分代码

本文介绍了一种K-means聚类算法的具体实现过程,包括距离计算、聚类中心初始化及更新等关键步骤,并通过代码示例展示了如何对训练数据进行聚类。
typedef unsigned int uint;


struct Cluster
{
vector<double> centroid;
vector<uint> samples;
};
double cal_distance(vector<double> a, vector<double> b)
{
uint da = a.size();
uint db = b.size();
if (da != db) cerr << "Dimensions of two vectors must be same!!\n";
double val = 0.0;
for (uint i = 0; i < da; i++)
{
val += pow((a[i] - b[i]), 2);
}
return pow(val, 0.5);
}
vector<Cluster> k_means(vector<vector<double> > trainX, uint k, uint maxepoches)
{
const uint row_num = trainX.size();
const uint col_num = trainX[0].size();


/*初始化聚类中心*/
vector<Cluster> clusters(k);
uint seed = (uint)time(NULL);
for (uint i = 0; i < k; i++)
{
srand(seed);
int c = rand() % row_num;
clusters[i].centroid = trainX[c];
seed = rand();
}


/*多次迭代直至收敛,本次试验迭代100次*/
for (uint it = 0; it < maxepoches; it++)
{
/*每一次重新计算样本点所属类别之前,清空原来样本点信息*/
for (uint i = 0; i < k; i++)
{
clusters[i].samples.clear();
}
/*求出每个样本点距应该属于哪一个聚类*/
for (uint j = 0; j < row_num; j++)
{
/*都初始化属于第0个聚类*/
uint c = 0;
double min_distance = cal_distance(trainX[j], clusters[c].centroid);
for (uint i = 1; i < k; i++)
{
double distance = cal_distance(trainX[j], clusters[i].centroid);
if (distance < min_distance)
{
min_distance = distance;
c = i;
}
}
clusters[c].samples.push_back(j);
}


/*更新聚类中心*/
for (uint i = 0; i < k; i++)
{
vector<double> val(col_num, 0.0);
for (uint j = 0; j < clusters[i].samples.size(); j++)
{
uint sample = clusters[i].samples[j];
for (uint d = 0; d < col_num; d++)
{
val[d] += trainX[sample][d];
if (j == clusters[i].samples.size() - 1)
clusters[i].centroid[d] = val[d] / clusters[i].samples.size();
}
}
}
}
return clusters;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值