java聚类算法

本文介绍两种常见的聚类算法:K-Means和层次聚类,并提供了这两种算法的Java实现代码示例。K-Means算法通过迭代划分数据集来形成聚类,而层次聚类则通过不断合并最相似的类簇逐步构建树状结构。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、K-Means算法java实现:

 

public class BasicKMeans {
 
 public static void main(String[] args) {
  // TODO Auto-generated method stub
  double[] p = { 1, 2, 3, 5, 6, 7, 9, 10, 11, 100, 150, 200, 1000 };
  int k = 5;
  double[][] g;
  g = cluster(p, k);
  for (int i = 0; i < g.length; i++) {
   for (int j = 0; j < g[i].length; j++) {
    System.out.print(g[i][j]);
    System.out.print("\t");
   }
   System.out.println();
  }
 }
 
 public static double[][] cluster(double[] p, int k) {
  // 存放聚类旧的聚类中心
  double[] c = new double[k];
  // 存放新计算的聚类中心
  double[] nc = new double[k];
  // 存放放回结果
  double[][] g;
  // 初始化聚类中心
  // 经典方法是随机选取 k 个
  // 本例中采用前 k 个作为聚类中心
  // 聚类中心的选取不影响最终结果
  for (int i = 0; i < k; i++)
   c[i] = p[i];
  // 循环聚类,更新聚类中心
  // 到聚类中心不变为止
  while (true) {
   // 根据聚类中心将元素分类
   g = group(p, c);
   // 计算分类后的聚类中心
   for (int i = 0; i < g.length; i++) {
    nc[i] = center(g[i]);
   }
   // 如果聚类中心不同
   if (!equal(nc, c)) {
    // 为下一次聚类准备
    c = nc;
    nc = new double[k];
   } else // 聚类结束
    break;
  }
  // 返回聚类结果
  return g;
 }
 
 public static double center(double[] p) {
  return sum(p) / p.length;
 }
 
 public static double[][] group(double[] p, double[] c) {
  // 中间变量,用来分组标记
  int[] gi = new int[p.length];
  // 考察每一个元素 pi 同聚类中心 cj 的距离
  // pi 与 cj 的距离最小则归为 j 类
  for (int i = 0; i < p.length; i++) {
   // 存放距离
   double[] d = new double[c.length];
   // 计算到每个聚类中心的距离
   for (int j = 0; j < c.length; j++) {
    d[j] = distance(p[i], c[j]);
   }
   // 找出最小距离
   int ci = min(d);
   // 标记属于哪一组
   gi[i] = ci;
  }
  // 存放分组结果
  double[][] g = new double[c.length][];
  // 遍历每个聚类中心,分组
  for (int i = 0; i < c.length; i++) {
   // 中间变量,记录聚类后每一组的大小
   int s = 0;
   // 计算每一组的长度
   for (int j = 0; j < gi.length; j++)
    if (gi[j] == i)
     s++;
   // 存储每一组的成员
   g[i] = new double[s];
   s = 0;
   // 根据分组标记将各元素归位
   for (int j = 0; j < gi.length; j++)
    if (gi[j] == i) {
     g[i][s] = p[j];
     s++;
    }
  }
  // 返回分组结果
  return g;
 }

 
 public static double distance(double x, double y) {
  return Math.abs(x - y);
 }

 
 public static double sum(double[] p) {
  double sum = 0.0;
  for (int i = 0; i < p.length; i++)
   sum += p[i];
  return sum;
 }

 
 public static int min(double[] p) {
  int i = 0;
  double m = p[0];
  for (int j = 1; j < p.length; j++) {
   if (p[j] < m) {
    i = j;
    m = p[j];
   }
  }
  return i;
 }

 
 public static boolean equal(double[] a, double[] b) {
  if (a.length != b.length)
   return false;
  else {
   for (int i = 0; i < a.length; i++) {
    if (a[i] != b[i])
     return false;
   }
  }
  return true;
 }
}

 

 

2、层次聚类算法的java实现:

(1)DataPoint.java

public class DataPoint {
    String dataPointName; // 样本点名
    Cluster cluster; // 样本点所属类簇
    private double dimensioin[]; // 样本点的维度

    public DataPoint(){

    }

    public DataPoint(double[] dimensioin,String dataPointName){
         this.dataPointName=dataPointName;
         this.dimensioin=dimensioin;
    }

    public double[] getDimensioin() {
        return dimensioin;
    }

    public void setDimensioin(double[] dimensioin) {
        this.dimensioin = dimensioin;
    }

    public Cluster getCluster() {
        return cluster;
    }

    public void setCluster(Cluster cluster) {
        this.cluster = cluster;
    }

    public String getDataPointName() {
        return dataPointName;
    }

    public void setDataPointName(String dataPointName) {
        this.dataPointName = dataPointName;
    }
}

 

(2)Cluster.java

import java.util.ArrayList;
import java.util.List;


public class Cluster {
    private List<DataPoint> dataPoints = new ArrayList<DataPoint>(); // 类簇中的样本点
    private String clusterName;

    public List<DataPoint> getDataPoints() {
        return dataPoints;
    }

    public void setDataPoints(List<DataPoint> dataPoints) {
        this.dataPoints = dataPoints;
    }

    public String getClusterName() {
        return clusterName;
    }

    public void setClusterName(String clusterName) {
        this.clusterName = clusterName;
    }

}

(3)ClusterAnalysis.java

//层次聚类分析,程序入口;
import java.util.ArrayList;
import java.util.List;


public class ClusterAnalysis {
   public List<Cluster> startAnalysis(List<DataPoint> dataPoints,int ClusterNum){
      List<Cluster> finalClusters=new ArrayList<Cluster>();
    
      List<Cluster> originalClusters=initialCluster(dataPoints);
      finalClusters=originalClusters;
      while(finalClusters.size()>ClusterNum){
          double min=Double.MAX_VALUE;
          int mergeIndexA=0;
          int mergeIndexB=0;
          for(int i=0;i<finalClusters.size();i++){
              for(int j=0;j<finalClusters.size();j++){
                  if(i!=j){
                      Cluster clusterA=finalClusters.get(i);
                      Cluster clusterB=finalClusters.get(j);

                      List<DataPoint> dataPointsA=clusterA.getDataPoints();
                      List<DataPoint> dataPointsB=clusterB.getDataPoints();

                      for(int m=0;m<dataPointsA.size();m++){
                          for(int n=0;n<dataPointsB.size();n++){
                              double tempDis=getDistance(dataPointsA.get(m),dataPointsB.get(n));
                              if(tempDis<min){
                                  min=tempDis;
                                  mergeIndexA=i;
                                  mergeIndexB=j;
                              }
                          }
                      }
                  }
              } //end for j
          }// end for i
          //合并cluster[mergeIndexA]和cluster[mergeIndexB]
          finalClusters=mergeCluster(finalClusters,mergeIndexA,mergeIndexB);
      }//end while

      return finalClusters;
   }
   private List<Cluster> mergeCluster(List<Cluster> clusters,int mergeIndexA,int mergeIndexB){
       if (mergeIndexA != mergeIndexB) {
           // 将cluster[mergeIndexB]中的DataPoint加入到 cluster[mergeIndexA]
           Cluster clusterA = clusters.get(mergeIndexA);
           Cluster clusterB = clusters.get(mergeIndexB);

           List<DataPoint> dpA = clusterA.getDataPoints();
           List<DataPoint> dpB = clusterB.getDataPoints();

           for (DataPoint dp : dpB) {
               DataPoint tempDp = new DataPoint();
               tempDp.setDataPointName(dp.getDataPointName());
               tempDp.setDimensioin(dp.getDimensioin());
               tempDp.setCluster(clusterA);
               dpA.add(tempDp);
           }

           clusterA.setDataPoints(dpA);

           // List<Cluster> clusters中移除cluster[mergeIndexB]
           clusters.remove(mergeIndexB);
       }

       return clusters;
  }

  // 初始化类簇
  private List<Cluster> initialCluster(List<DataPoint> dataPoints){
      List<Cluster> originalClusters=new ArrayList<Cluster>();
      for(int i=0;i<dataPoints.size();i++){
          DataPoint tempDataPoint=dataPoints.get(i);
          List<DataPoint> tempDataPoints=new ArrayList<DataPoint>();
          tempDataPoints.add(tempDataPoint);

          Cluster tempCluster=new Cluster();
          tempCluster.setClusterName("Cluster "+String.valueOf(i));
          tempCluster.setDataPoints(tempDataPoints);

          tempDataPoint.setCluster(tempCluster);
          originalClusters.add(tempCluster);
      }

      return originalClusters;
  }

  //计算两个样本点之间的欧几里得距离
  private double getDistance(DataPoint dpA,DataPoint dpB){
       double distance=0;
       double[] dimA = dpA.getDimensioin();
       double[] dimB = dpB.getDimensioin();

       if (dimA.length == dimB.length) {
           for (int i = 0; i < dimA.length; i++) {
                double temp=Math.pow((dimA[i]-dimB[i]),2);
                distance=distance+temp;
           }
           distance=Math.pow(distance, 0.5);
       }

      return distance;
  }

  public static void main(String[] args){
      ArrayList<DataPoint> dpoints = new ArrayList<DataPoint>();
    
      double[] a={2,3};
      double[] b={2,4};
      double[] c={1,4};
      double[] d={1,3};
      double[] e={2,2};
      double[] f={3,2};

      double[] g={8,7};
      double[] h={8,6};
      double[] i={7,7};
      double[] j={7,6};
      double[] k={8,5};

//      double[] l={100,2};//孤立点


      double[] m={8,20};
      double[] n={8,19};
      double[] o={7,18};
      double[] p={7,17};
      double[] q={8,20};

      dpoints.add(new DataPoint(a,"a"));
      dpoints.add(new DataPoint(b,"b"));
      dpoints.add(new DataPoint(c,"c"));
      dpoints.add(new DataPoint(d,"d"));
      dpoints.add(new DataPoint(e,"e"));
      dpoints.add(new DataPoint(f,"f"));

      dpoints.add(new DataPoint(g,"g"));
      dpoints.add(new DataPoint(h,"h"));
      dpoints.add(new DataPoint(i,"i"));
      dpoints.add(new DataPoint(j,"j"));
      dpoints.add(new DataPoint(k,"k"));

//      dataPoints.add(new DataPoint(l,"l"));

      dpoints.add(new DataPoint(m,"m"));
      dpoints.add(new DataPoint(n,"n"));
      dpoints.add(new DataPoint(o,"o"));
      dpoints.add(new DataPoint(p,"p"));
      dpoints.add(new DataPoint(q,"q"));

      int clusterNum=3; //类簇数

      ClusterAnalysis ca=new ClusterAnalysis();
      List<Cluster> clusters=ca.startAnalysis(dpoints, clusterNum);

      for(Cluster cl:clusters){
          System.out.println("------"+cl.getClusterName()+"------");
          List<DataPoint> tempDps=cl.getDataPoints();
          for(DataPoint tempdp:tempDps){
              System.out.println(tempdp.getDataPointName());
          }
      }

  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值