KMeansMapper.java
public class KMeansMapper extends Mapper<LongWritable, Text,Text,Text> {
String centerArray[] = null; //质心坐标字符串,坐标用",“分割
double centers[][] = new double[Center.k][]; //每个质心每一维度的坐标
//获得初始质心
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
String centerSource = context.getConfiguration().get(Main.FLAG);
//为到所有质心字符串数组,centerArray.Length为质心个数
centerArray = centerSource.split("\t");
for (int i = 0; i < centerArray.length; i++) {
//centerArray.Length为质心个数
String center[] = centerArray[i].split(",");
centers[i] = new double[center.length];
for (int j = 0; j < center.length; j++) {
centers[i][j] = Double.parseDouble(center[j]);
}
}
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String vector[] = line.split(",");
double sample[]=new double[vector.length]; //质心维度数量
for(int i=0; i < vector.length; i++){
sample[i] = Double.parseDouble(vector[i]);
}
double min = Double.MAX_VALUE; //最小距离
int index = 0; //索引
for(int i = 0; i < centers.length; i++){
double distance = culDistance(sample, centers[i]);
// 找到最小距离质心
if(min > distance){
min = distance;
index = i;
System.out.println(distance);
}
}
//<质心, 其他数据点>
context.write(new Text(centerArray[index]), value);
}
}
KMeansCombiner.java
public class KMeansCombiner extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int length = key.toString().split(",").length;
double center[] = new double[length];
int size = 0;
//迭代读取
Iterator<Text> iterator = values.iterator();
while(iterator.hasNext()){
String centers[] = iterator.next().toString().split(",");
for(int i = 0; i< length; i++){
center[i] += Double.parseDouble(centers[i]);
}
size++;
}
StringBuffer stringBuffer = new StringBuffer();
for(int i=0; i < center.length; i++){
center[i] /= size;
stringBuffer.append(center);
stringBuffer.append(",");
}
stringBuffer.deleteCharAt(stringBuffer.toString().length() - 1);
//更新
context.write(key, new Text(stringBuffer.toString()));
}
}
KMeansReducer.java
public class KMeansReducer extends Reducer<Text, Text, Text, NullWritable>{
//引入计数器
Counter counter = null;
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int length = key.toString().split(",").length;
//保存新生成的簇中心
double newCenter[] = new double[length];
int size = 0; //向量数
for(Text value : values){
String centers[] = value.toString().split(",");
for(int i = 0; i < centers.length; i++){
//累加
newCenter[i] += Double.parseDouble(centers[i]);
}
size++;
}
StringBuffer stringBuffer = new StringBuffer();
for(int i=0; i< newCenter.length; i++){
newCenter[i] /= size; //均值
stringBuffer.append(newCenter[i]);
stringBuffer.append(",");
}
stringBuffer.deleteCharAt(stringBuffer.toString().length() - 1);
//接收旧坐标
String oldCenterStr[] = key.toString.split(",");
double oldCenter[] = new double[oldCenterStr.length];
for(int i=0; i<oldCenterStr.length; i++){
oldCenter[i] = Double.parseDouble(oldCenterStr[i]);
}
//查看是否收敛
boolean flag = isConverged(oldCenter, newCenter);
//若收敛,即可确定质心
if(flag){
counter = context.getCounter("myCounter", "kmeansCounter");
counter.increment(1l);
}
context.write(new Text(stringBuffer.toString().trim()), NullWritable.get());
}
}