最近上模式识别课涉及到聚类算法,主要讲了一下kmeans和isodata,这里是我用matlab实现的两个算法的粗略版,还有很大的改进空间,先把原始版放这上面。
function Kmeans(x,k)
n = size(x,1);
mean = cell(k,1);
for i=1:k
mean{i} = x(i,:);%initialize
end
while 1
class = cell(k,1);%clear before calculate
for i=1:n
num = Belong2(x(i,:),mean);
class{num} = [class{num};x(i,:)];
end
%calculate new means
mean_old = mean;
for i=1:k
mean{i} =sum(class{i})./size(class{i},1);
end
if isequal(mean_old,mean)
for j=1:k
fprintf('第%d类:\n',j)
disp(class{j});
end
break;
end
end
end
function number = Belong2(x_i,means)
INF = 10000;
min = INF;
kk = size(means,1);
number = 1;
for i=1:kk
if norm(x_i - means{i}) < min
min = norm(x_i - means{i});
number = i;
end
end
end
isodata
function ISODATA(x,K,theta_N,theta_S,theta_c,L,I)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%input parameters%%%%%%
% x : data
% K : 预期的聚类中心数
% theta_N : 每一聚类中心中最少的样本数,少于此数就不作为一个独立的聚类