关于聚类的这六个evaluation metrics,参考evaluation of clustering,讲得很好了,我就不赘述了,直接上代码:
第一个:计算NMI的:
package clusters;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* DATE: 16-6-18 TIME: 上午10:00
*/
/**
* 参考文献:http://www-nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html
*/
public class NormalizedMutualInformation {
public static String path = "/home/fhqplzj/IdeaProjects/Vein/src/main/resources/nmi_data";
public static void loadData(List<List<Integer>> lists) {
try {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
String line;
while ((line = bufferedReader.readLine()) != null) {
String[] data = line.split("\\s+");
ArrayList<Integer> integers = new ArrayList<>();
for (String s : data) {
integers.add(Integer.parseInt(s));
}
lists.add(integers);
}
bufferedReader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
List<List<Integer>> lists = new ArrayList<>();
loadData(lists);
int K = lists.size();
int N = 0;
int[] clusters = new int[K];
for (int i = 0; i < K; i++) {
clusters[i] = lists.get(i).size