需求: 数据库中存有每小时的综合室外温度数据, 需要根据聚集情况, 将数据分为几个时间段, 采用java实现,代码与现有平台代码关联, 但算法思想符合k-means定义。具体实现如下
//分为几组
int group = 3;
//最大迭代次数
int maxTryTime = 50;
//1. 数据准备
List<OpeHourTempr> totalAfterFilter = getOpeHourTemprs();
if (totalAfterFilter.size() == 0) {
logger.error("没有历史数据可供计算, 使用系统默认的分段时间");
return OperationResult.fail("没有历史数据可供计算");
}
//2. 数据处理格式构建 获取原始数据二维矩阵
int row = totalAfterFilter.size();
int col = 2;
double[][] data = new double[row][col];
for (int i = 0; i < totalAfterFilter.size(); i++) {
OpeHourTempr opeHourTempr = totalAfterFilter.get(i);
double[] doubles = {opeHourTempr.getTime().getHours(), opeHourTempr.getOutdoorResultantTempr().doubleValue()};
data[i] = doubles;
}
for (double[] datum : data) {
System.out.println(datum[0] + " >>> " + datum[1]);
}
//3. 随机确定k值
Set<Integer> xSet = new HashSet<>();
double[][] chosenData = new double[group][];
xSet.add(0);
xSet.add(8);
xSet.add(16);
int index = 0;
Set<Integer> uniSet = new HashSet<>(xSet);
for (int i = 0; i < data.length; i++) {
double[] perData = data[i];
int x = (int) perData[0];
if (uniSet.size() == 0) {
break;
}
if (uniSet.contains(x)) {
chosenData[index] = perData;
index++;
uniSet.remove(x);
}
}
while (xSet.size() < group) {
int i = RandomUtils.nextInt(0, row);
double[] perData = data[i];
int x = (int) perData[0];
if (!xSet.contains(x)) {
xSet.add(x);
chosenData[index] = perData;
index++;
}
}
logger.info("初始聚簇点位:");
for (int i = 0; i < group; i++) {
System.out.println(chosenData[i][0] + " ===> " + chosenData[i][1]);
}
logger.info("=======================================");
//4. 迭代,找到聚簇点位
int tryTime = 0;
Double minValue = Double.MAX_VALUE;
Map<Integer, List<double[]>> pointsMap = new HashMap<>();
while (tryTime < maxTryTime) {
tryTime++;
pointsMap.clear();
//求各点位到聚簇点位的距离
double minDistance;
for (double[] datum : data) {
int clusterIndex = 0;
minDistance = Double.MAX_VALUE;
for (int i = 0; i < chosenData.length; i++) {
double distance = getDistance(datum, chosenData[i]);
if (distance < minDistance) {
minDistance = distance;
clusterIndex = i;
}
}
List<double[]> kDataPer = pointsMap.computeIfAbsent(clusterIndex, k -> new ArrayList<>());
kDataPer.add(datum);
}
//找到新的聚簇点
Map<Integer, double[]> centerMap = new HashMap<>();
double distance = 0;
for (Map.Entry<Integer, List<double[]>> entry : pointsMap.entrySet()) {
List<double[]> points = entry.getValue();
double[] center = getCenter(points);
centerMap.put(entry.getKey(), center);
distance += getDistanceTotal(center, points);
}
logger.info("第" + tryTime + "次迭代, 得到的distance是" + distance);
logger.info("聚簇点位:");
for (Map.Entry<Integer, double[]> centerEntry : centerMap.entrySet()) {
double[] center = centerEntry.getValue();
System.out.println(center[0] + " ===> " + center[1]);
}
logger.info("=======================================");
System.out.println(Math.abs(distance - minValue));
if (Math.abs(distance - minValue) < 0.00000001) {
break;
} else {
minValue = distance;
for (int i = 0; i < group; i++) {
chosenData[i] = centerMap.get(i);
}
}
}
//找到聚簇点
for (double[] chosenDatum : chosenData) {
logger.debug(chosenDatum[0] + "=====> " + chosenDatum[1]);
}
OperationResult result = OperationResult.success();
result.setExtra(chosenData);
return result;