#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
#include <numeric>
#include <limits>
#include <time.h>
#include <fstream>
#include <sstream>
#include <string>
#include <cuda_runtime.h>
#define USE_GPU 1
using namespace std;
extern "C" void launch_kernel_DPC_computeRho(float* h_data, int* h_NL, int N, int M, int dc);
extern "C" void launch_kernel_DPC_computeDeltaAndNearestHigherD(float* h_delta, float* h_nearest_higher, int N, int M, int* h_indices, float* h_data);
extern "C" void launch_thrust_findClusterCenters(float* h_rho_delta, int* ord_clusters, int N);
extern "C" void launch_thrust_assignClusters(float* h_ord_rho, int* h_ord_indices, int N);
extern "C" void launch_kernel_DPC_computeRhoR(float* h_data, int* h_NL, int N, int M, int dc);
extern "C" void launch_kernel_DPC_computeRhoM(float* h_data, int* h_NL, int N, int M, int dc, int* h_max_min);
struct DataPoint {
vector<double> coordinates;
double rho = 0.0; // 局部密度
double delta = 0.0; // 到更高密度点的最小距离
int nearest_higher = -1; // 最近更高密度点的索引
int cluster = -1; // 聚类标签,-1表示未分类
};
float* h_NN;
int* h_NL;
int* h_row;
int* h_col;
float* dc_2;
float* h_data;
float DC;
float* h_min_dist;
int* h_min_idx;
// 计算欧氏距离
double euclideanDistance(const DataPoint& a, const DataPoint& b) {
double dist = 0.0;
for (size_t i = 0; i < a.coordinates.size(); ++i) {
dist += pow(a.coordinates[i] - b.coordinates[i], 2);
}
return sqrt(dist);
}
// 计算距离矩阵
float* computeDistanceMatrix(const vector<DataPoint>& points) {
size_t n = points.size();
float* distMatrix;
cudaMallocHost((void**)&distMatrix, n * n * sizeof(float));
for (size_t i = 0; i < n; ++i) {
for (size_t j = i + 1; j < n; ++j) {
double dist = euclideanDistance(points[i], points[j]);
distMatrix[i * n + j] = dist;
distMatrix[j * n + i] = dist;
}
}
return distMatrix;
}
// 计算距离矩阵
float* computeDistanceMatrix_gpu(const vector<DataPoint>& points) {
size_t n = points.size();
size_t m = points[0].coordinates.size();
float max[4] = { -9999.9,-9999.9 ,-9999.9 ,-9999.9 };
float min[4] = { 9999.9 ,9999.9 ,9999.9 ,9999.9 };
int max_min[4];
if (0)
{
for (size_t i = 0; i < n; ++i) {
for (size_t j = 0; j < m; ++j)
{
h_data[i * m + j] = points[i].coordinates[j];
if (max[j] < points[i].coordinates[j])
{
max[j] = points[i].coordinates[j];
}
if (min[j] > points[i].coordinates[j])
{
min[j] = points[i].coordinates[j];
}
}
}
max_min[0] = int((max[0] - min[0]) / DC) + 1;
max_min[1] = int((max[1] - min[1]) / DC) + 1;
max_min[2] = int((max[2] - min[2]) / DC) + 1;
max_min[3] = int((max[3] - min[3]) / DC) + 1;
}
launch_kernel_DPC_computeRho(h_data, h_NL, n, m, DC);
//launch_kernel_DPC_computeRhoR(h_data, h_NL, n, m, DC);
//launch_kernel_DPC_computeRhoM(h_data, h_NL, n, m, DC, max_min);
return h_NN;
}
// 使用截断距离法计算局部密度
void computeRho(vector<DataPoint>& points, float* distMatrix, double dc) {
size_t n = points.size();
for (size_t i = 0; i < n; ++i) {
double rho = 0.0;
for (size_t j = 0; j < n; ++j) {
if (i != j && distMatrix[i * n + j] < dc) {
rho += 1.0;
}
}
points[i].rho = rho;
}
}
// 使用截断距离法计算局部密度
void computeRho_gpu(vector<DataPoint>& points, float* distMatrix, double dc) {
size_t n = points.size();
for (size_t i = 0; i < n; ++i) {
points[i].rho = h_NL[i];
}
}
// 计算delta和最近更高密度点
void computeDeltaAndNearestHigher(vector<DataPoint>& points, float* distMatrix) {
size_t n = points.size();
vector<size_t> indices(n);
iota(indices.begin(), indices.end(), 0);
// 按密度降序排序索引
sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) {
return points[a].rho > points[b].rho;
});
// 找到每个点的最小距离和最近更高密度点
for (size_t i = 0; i < n; ++i) {
size_t current_idx = indices[i];
double min_dist = numeric_limits<double>::max();
int nearest = -1;
// 检查所有密度更高的点
for (size_t j = 0; j < i; ++j) {
size_t higher_idx = indices[j];
double dist = distMatrix[current_idx * n + higher_idx];
if (dist < min_dist) {
min_dist = dist;
nearest = higher_idx;
}
}
if (nearest == -1) {
// 处理密度最高的点
double max_dist = 0.0;
for (size_t k = 0; k < n; ++k) {
if (k != current_idx && distMatrix[current_idx * n + k] > max_dist) {
max_dist = distMatrix[current_idx * n + k];
}
}
points[current_idx].delta = max_dist;
points[current_idx].nearest_higher = -1;
}
else {
points[current_idx].delta = min_dist;
points[current_idx].nearest_higher = nearest;
}
}
}
// 计算delta和最近更高密度点
void computeDeltaAndNearestHigher_gpu(vector<DataPoint>& points, float* distMatrix) {
int m = points[0].coordinates.size();
//vector<size_t> indices(n);
//iota(indices.begin(), indices.end(), 0);
//// 按密度降序排序索引
//sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) {
// return points[a].rho > points[b].rho;
// });
int n = points.size();
int* ord_indices;
cudaMallocHost((void**)&ord_indices, n * sizeof(int));
for (int i = 0; i < n; i++)
{
ord_indices[i] = i;
}
float* ord_rho;
cudaMallocHost((void**)&ord_rho, n * sizeof(float));
vector<float> ordrho;
for (size_t i = 0; i < points.size(); ++i) {
ord_rho[i] = points[i].rho;
}
launch_thrust_assignClusters(ord_rho, ord_indices, n);
int* h_indices;
float* h_delta;
float* h_nearest_higher;
cudaMallocHost((void**)&h_indices, n * sizeof(int));
cudaMallocHost((void**)&h_delta, n * sizeof(float));
cudaMallocHost((void**)&h_nearest_higher, n * sizeof(float));
for (int i = 0; i < n; i++)
{
h_indices[i] = ord_indices[i];;
h_nearest_higher[i] = -1;
h_delta[i] = 0.0;
}
launch_kernel_DPC_computeDeltaAndNearestHigherD(h_delta, h_nearest_higher, n, m, h_indices, h_data);
//launch_kernel_DPC_computeDeltaAndNearestHigherR(h_delta, h_nearest_higher, n, m, h_indices, h_data);
for (int i = 0; i < n; i++)
{
points[i].delta = h_delta[i];
points[i].nearest_higher = h_nearest_higher[i];
}
cudaFreeHost(h_indices);
cudaFreeHost(h_delta);
cudaFreeHost(h_nearest_higher);
}
// 查找聚类中心(按rho*delta乘积排序)
vector<int> findClusterCenters(const vector<DataPoint>& points, int n_clusters) {
vector<pair<double, int>> products;
for (size_t i = 0; i < points.size(); ++i) {
products.emplace_back(points[i].rho * points[i].delta, i);
}
sort(products.rbegin(), products.rend());
vector<int> centers;
for (int i = 0; i < n_clusters; ++i) {
centers.push_back(products[i].second);
}
return centers;
}
// 查找聚类中心(按rho*delta乘积排序)
vector<int> findClusterCenters_gpu(const vector<DataPoint>& points, int n_clusters) {
int n = points.size();
float* rho_delta;
int* ord_clusters;
cudaMallocHost((void**)&rho_delta, n * sizeof(float));
cudaMallocHost((void**)&ord_clusters, n * sizeof(int));
for (size_t i = 0; i < points.size(); ++i) {
rho_delta[i] = points[i].rho * points[i].delta;
}
launch_thrust_findClusterCenters(rho_delta, ord_clusters, n);
vector<int> centers;
for (int i = 0; i < n_clusters; ++i) {
centers.push_back(ord_clusters[i]);
}
return centers;
cudaFreeHost(rho_delta);
cudaFreeHost(ord_clusters);
}
// 分配聚类标签
void assignClusters(vector<DataPoint>& points, const vector<int>& centers) {
// 标记聚类中心
for (size_t i = 0; i < centers.size(); ++i) {
points[centers[i]].cluster = i + 1;
}
// 按密度降序处理所有点
vector<size_t> indices(points.size());
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&points](size_t a, size_t b) {
return points[a].rho > points[b].rho;
});
// 分配聚类标签
for (size_t idx : indices) {
if (points[idx].cluster != -1) continue;
if (points[idx].nearest_higher == -1) {
points[idx].cluster = 0; // 异常点
}
else {
points[idx].cluster = points[points[idx].nearest_higher].cluster;
}
}
}
// 分配聚类标签
void assignClusters_gpu(vector<DataPoint>& points, const vector<int>& centers) {
// 标记聚类中心
for (size_t i = 0; i < centers.size(); ++i) {
points[centers[i]].cluster = i + 1;
}
int n = points.size();
int* ord_indices;
cudaMallocHost((void**)&ord_indices, n * sizeof(int));
for (int i = 0; i < n; i++)
{
ord_indices[i] = i;
}
float* ord_rho;
cudaMallocHost((void**)&ord_rho, n * sizeof(float));
for (size_t i = 0; i < points.size(); ++i) {
ord_rho[i] = points[i].rho;
}
launch_thrust_assignClusters(ord_rho, ord_indices, n);
vector<size_t> indices(points.size());
for (int i = 0; i < n; i++) {
indices[i] = ord_indices[i];
}
// 分配聚类标签
for (size_t idx : indices) {
if (points[idx].cluster != -1) continue;
if (points[idx].nearest_higher == -1) {
points[idx].cluster = 0; // 异常点
}
else {
points[idx].cluster = points[points[idx].nearest_higher].cluster;
}
}
}
vector<DataPoint> GetData(const string& file_path)
{
vector<DataPoint> dataPoints;
ifstream file(file_path);
if (!file.is_open()) {
throw runtime_error("无法打开文件: " + file_path);
}
string line;
while (getline(file, line))
{
// 跳过空行
if (line.find_first_not_of(" \t\n") == string::npos) continue;
// 替换所有逗号为空格(兼容逗号分隔)
replace(line.begin(), line.end(), ',', ' ');
vector<double> row;
DataPoint p;
stringstream ss(line);
double value;
// 解析每行数据
while (ss >> value) {
row.push_back(value);
}
if (!row.empty()) {
row.pop_back();
p.coordinates = row;
dataPoints.push_back(p);
}
}
return dataPoints;
}
int main() {
clock_t start, end;
start = clock();
end = clock();
//cout << "Run time computeDistanceMatrix: " << (double)(end - start) << "(ms)" << endl;
vector<DataPoint> points = GetData("C:/Users/admin/AppData/Roaming/feiq/Recv Files/dataset/data_500000_4.txt");
int n = points.size();
int m = points[0].coordinates.size();
cout << "数据量: " << n << endl;
cout << "数据维度: " << m << endl;
cudaMallocHost((void**)&h_NN, n * n * sizeof(float));
cudaMallocHost((void**)&h_NL, n * sizeof(int));
cudaMallocHost((void**)&h_data, m * n * sizeof(float));
cudaMallocHost((void**)&h_min_dist, n * sizeof(float));
cudaMallocHost((void**)&h_min_idx, n * sizeof(int));
// 参数设置
double dc = 0.1; // 截断距离
int n_clusters = 3; // 聚类数量
DC = dc;
#if USE_GPU == 1
cout << "使用GPU"<< endl;
#else
cout << "没用GPU" << endl;
#endif
clock_t start_all, end_all;
start_all = clock();
start = clock();
// 计算距离矩阵
#if USE_GPU == 1
auto distMatrix = computeDistanceMatrix_gpu(points);
#else
auto distMatrix = computeDistanceMatrix(points);
#endif
end = clock();
cout << "Run time computeDistanceMatrix: " << (double)(end - start) << "(ms)" << endl;
start = clock();
// 计算局部密度
#if USE_GPU == 1
computeRho_gpu(points, distMatrix, dc);
#else
computeRho(points, distMatrix, dc);
#endif
end = clock();
cout << "Run time computeRho: " << (double)(end - start) << "(ms)" << endl;
start = clock();
// 计算delta和最近更高密度点
#if USE_GPU == 1
computeDeltaAndNearestHigher_gpu(points, distMatrix);
#else
computeDeltaAndNearestHigher(points, distMatrix);
#endif
end = clock();
cout << "Run time computeDeltaAndNearestHigher: " << (double)(end - start) << "(ms)" << endl;
start = clock();
// 查找聚类中心
#if USE_GPU == 1
auto centers = findClusterCenters_gpu(points, n_clusters);
#else
auto centers = findClusterCenters(points, n_clusters);
#endif
end = clock();
cout << "Run time findClusterCenters: " << (double)(end - start) << "(ms)" << endl;
start = clock();
// 分配聚类标签
#if USE_GPU == 1
assignClusters_gpu(points, centers);
#else
assignClusters(points, centers);
#endif
end = clock();
cout << "Run time assignClusters: " << (double)(end - start)<< "(ms)" << endl;
end_all = clock();
cout << "Run time all: " << (double)(end_all - start_all) << "(ms)" << endl;
// 输出结果
cout << "聚类结果(显示前10个点):" << endl;
for (size_t i = 0; i < 10; ++i) {
cout << "点" << i << " (";
for (auto coord : points[i].coordinates)
cout << coord << " ";
cout << "): 聚类" << points[i].cluster << endl;
}
cudaFreeHost(h_NN);
cudaFreeHost(h_data);
return 0;
}逐行分析代码
最新发布