二维ca-cfar cuda加速实现,完整代码如下,
导入输入数据即可,后期完善后补发更详细代码
#include<stdio.h>
#include <cuda.h>
#include"iostream"
#include"cuda_runtime_api.h"
#include"device_launch_parameters.h"
#include"cufft.h"
#include<math.h>
#include <string.h>
#include <cuda_runtime.h>
#include <windows.h>
__global__ void cfar_2d_kernel2(float* PC_data_ifft_CA_abs, int* location, float* threshold_F, int M_row, int N_point, int N_prot_F, int N_ref_F, float alpha) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < M_row && j < N_point) {
if (location[i * N_point + j] == 1) {
if (i <= (N_prot_F + N_ref_F)) {
float sum_F = 0;
for (int k = i + N_prot_F + 1; k <= i + N_prot_F + N_ref_F; k++) {
sum_F += PC_data_ifft_CA_abs[k * N_point + j];
}
threshold_F[i * N_point + j] = sum_F / N_ref_F;
}
else if (i >= (M_row - N_prot_F - N_ref_F )) {
float sum_F = 0;
for (int k = i - N_prot_F - N_ref_F; k <= i - N_prot_F - 1; k++) {
sum_F += PC_data_ifft_CA_abs[k * N_point + j];
}
threshold_F[i * N_point + j] = sum_F / N_ref_F;
}
else {
float sum_F_left = 0;
float sum_F_right = 0;
for (int k = i - N_prot_F - N_ref_F; k <= i - N_prot_F - 1; k++) {
sum_F_left += PC_data_ifft_CA_abs[k * N_point + j];
}
for (int k = i + N_prot_F + 1; k <= i + N_prot_F + N_ref_F; k++) {
sum_F_right += PC_data_ifft_CA_abs[k * N_point + j];
}
threshold_F[i * N_point + j] = fmax(sum_F_left, sum_F_right) / N_ref_F;
}
}
else {
threshold_F[i * N_point + j] = 0;
}
}
}
__global__ void cfar_2d_kernel(float* PC_data_ifft_CA_abs, int* location, float* threshold_R, int M_row, int N_point, int N_prot_R, int N_ref_R, float alpha) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < M_row && j < N_point) {
if (location[i * N_point + j] == 1) {
if (j <= (N_prot_R + N_ref_R)) {
float sum_R = 0;
for (int k = j + N_prot_R + 1; k <= j + N_prot_R + N_ref_R; k++) {
sum_R += PC_data_ifft_CA_abs[i * N_point + k];
}
threshold_R[i * N_point + j] = sum_R / N_ref_R * alpha;
}
else if (j >= (N_point - N_prot_R - N_ref_R)) {
float sum_R = 0;
for (int k = j - N_prot_R - N_ref_R; k <= j - N_prot_R - 1; k++) {
sum_R += PC_data_ifft_CA_abs[i * N_point + k];
}
threshold_R[i * N_point + j] = (sum_R / N_ref_R) * alpha;
}
else {
float sum_R_left = 0;
float sum_R_right = 0;
for (int k = j - N_prot_R - N_ref_R; k <= j - N_prot_R - 1; k++) {
sum_R_left += PC_data_ifft_CA_abs[i * N_point + k];
}
for (int k = j + N_prot_R + 1; k <= j + N_prot_R + N_ref_R; k++) {
sum_R_right += PC_data_ifft_CA_abs[i * N_point + k];
}
threshold_R[i * N_point + j] = (sum_R_left + sum_R_right) / (N_ref_R * 2) * alpha;
}
}
else {
threshold_R[i * N_point + j] = 0;
}
}
}
__global__ void calc_sum_ref_2D(float* d_PC_data_ifft_CA_abs, float* d_sum_ref_2D, const int N_point, const int N_ref_2D, const int M) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx == 0) {
d_sum_ref_2D[0] = 0;
d_sum_ref_2D[1] = 0;
d_sum_ref_2D[2] = 0;
d_sum_ref_2D[3] = 0;
}
if (idx < N_ref_2D * N_ref_2D) {
int i = idx / N_ref_2D;
int j = idx % N_ref_2D;
float sum1 = d_PC_data_ifft_CA_abs[i * N_point + j];
atomicAdd(&d_sum_ref_2D[0], sum1);
}
if (idx < N_ref_2D * N_ref_2D) {
int i = idx / N_ref_2D;
int j = idx % N_ref_2D;
float sum2 = d_PC_data_ifft_CA_abs[i * N_point + N_point - N_ref_2D + j];
atomicAdd(&d_sum_ref_2D[1], sum2);
}
if (idx < N_ref_2D * N_ref_2D) {
int i = M / 2 - N_ref_2D + idx / N_ref_2D;
int j = idx % N_ref_2D;
float sum3 = d_PC_data_ifft_CA_abs[i * N_point + j];
atomicAdd(&d_sum_ref_2D[2], sum3);
}
if (idx < N_ref_2D * N_ref_2D) {
int i = M / 2 - N_ref_2D + idx / N_ref_2D;
int j = idx % N_ref_2D;
float sum4 = d_PC_data_ifft_CA_abs[i * N_point + N_point - N_ref_2D + j];
atomicAdd(&d_sum_ref_2D[3], sum4);
}
}
int main() {
LARGE_INTEGER frequency1, frequency2, frequency3, start_time1, end_time1, start_time2, end_time2, start_time3, end_time3, start_time4, end_time4, start_time5, end_time5;
double elapsed_time1, elapsed_time2, elapsed_time3, elapsed_time4, elapsed_time5, elapsed_time;
int const M = 64, N = 128;
int const N_ref_2D = 8;
int const N_point = 1080;
float* PC_data_ifft_CA_abs = NULL;
cudaMallocHost((void**)&PC_data_ifft_CA_abs,32 * 1080 * sizeof(float));
int size = 0;
int i = 0;
FILE* fp = fopen("F:/A-雷达信号处理/MATLAB的雷达数字信号处理/基于MATLAB的雷达数字信号处理/b.txt", "r");
if (fp == NULL) {
printf("文件打开失败!\n");
return 1;
}
while (fscanf(fp, "%*f") != EOF) {
size++;
}
PC_data_ifft_CA_abs = (float*)malloc(size * sizeof(float));
if (PC_data_ifft_CA_abs == NULL) {
printf("内存分配失败!\n");
return 1;
}
rewind(fp);
while (fscanf(fp, "%f", &PC_data_ifft_CA_abs[i]) != EOF && i < size) {
i++;
}
fclose(fp);
float sum_ref_2D[1][4] = { 0 };
float *d_PC_data_ifft_CA_abs, *d_sum_ref_2D;
cudaMalloc((void**)&d_PC_data_ifft_CA_abs, 32 * N_point * sizeof(float));
cudaMemcpy(d_PC_data_ifft_CA_abs, PC_data_ifft_CA_abs, 32 * N_point * sizeof(float), cudaMemcpyHostToDevice);
cudaMalloc((void**)&d_sum_ref_2D, 4 * sizeof(float));
calc_sum_ref_2D << <1, N_ref_2D* N_ref_2D >> > (d_PC_data_ifft_CA_abs, d_sum_ref_2D, N_point, N_ref_2D, M);
cudaMemcpy(sum_ref_2D, d_sum_ref_2D, 4 * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(d_sum_ref_2D);
printf("\n%f + %f + %f + %f\n", sum_ref_2D[0][0], sum_ref_2D[0][1], sum_ref_2D[0][2], sum_ref_2D[0][3]);
float SNR_Threshold = 15;
float Threshold = 0;
float sum = 0;
for (int i = 0; i < 4; i++) {
sum += sum_ref_2D[0][i];
}
printf("sum = %f\n", sum);
printf("min = %f\n", fminf(fminf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fminf(sum_ref_2D[0][2], sum_ref_2D[0][3])));
printf("min1 = %f\n", ((sum - fminf(fminf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fminf(sum_ref_2D[0][2], sum_ref_2D[0][3]))) / (N_ref_2D * N_ref_2D * 3)));
printf("min2= %f\n", powf(10, SNR_Threshold / 10));
if (SNR_Threshold >= 15) {
Threshold = ((sum - fminf(fminf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fminf(sum_ref_2D[0][2], sum_ref_2D[0][3]))) / (N_ref_2D * N_ref_2D * 3)) * powf(10, SNR_Threshold / 10);
}
else if (SNR_Threshold <= 3) {
Threshold = ((sum - fmaxf(fmaxf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fmaxf(sum_ref_2D[0][2], sum_ref_2D[0][3]))) / (N_ref_2D * N_ref_2D * 3)) * powf(10, SNR_Threshold / 10);
}
else {
float max_sum = fmaxf(fmaxf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fmaxf(sum_ref_2D[0][2], sum_ref_2D[0][3]));
float min_sum = fminf(fminf(sum_ref_2D[0][0], sum_ref_2D[0][1]), fminf(sum_ref_2D[0][2], sum_ref_2D[0][3]));
Threshold = ((sum - max_sum - min_sum) / (N_ref_2D * N_ref_2D * 2)) * powf(10, SNR_Threshold / 10);
}
printf("Threshold = %f\n", Threshold);
int* location = NULL;
cudaMallocHost((void**)&location, 32 * 1080 * sizeof(float));
int const M_row = M / 2;
for (int i = 0; i < M_row; i++) {
for (int j = 0; j < N_point; j++) {
if (PC_data_ifft_CA_abs[i * N_point + j] >= (1 * Threshold)) {
location[i * N_point + j] = 1;
printf("i=%d\t j=%d\n", i, j);
printf("%f\n", PC_data_ifft_CA_abs[i * N_point + j]);
}
else {
location[i * N_point + j] = 0;
}
}
}
int N_prot_R = 30;
int N_ref_R = 135;
float alpha = 3;
float* threshold_R = NULL;
cudaMallocHost((void**)&threshold_R, 32 * 1080 * sizeof(float));
float* dev_threshold_R;
int* dev_location;
cudaMalloc((void**)&dev_location, M_row * N_point * sizeof(int));
cudaMalloc((void**)&dev_threshold_R, M_row * N_point * sizeof(float));
cudaMemcpy(dev_location, location, M_row * N_point * sizeof(int), cudaMemcpyHostToDevice);
int block_size = 16;
dim3 threadsPerBlock(block_size, block_size);
dim3 numBlocks((M_row + threadsPerBlock.x - 1) / threadsPerBlock.x, (N_point + threadsPerBlock.y - 1) / threadsPerBlock.y);
QueryPerformanceFrequency(&frequency1);
QueryPerformanceCounter(&start_time1);
cfar_2d_kernel << <numBlocks, threadsPerBlock >> > (d_PC_data_ifft_CA_abs, dev_location, dev_threshold_R, M_row, N_point, N_prot_R, N_ref_R, alpha);
QueryPerformanceCounter(&end_time1);
elapsed_time1 = (double)(end_time1.QuadPart - start_time1.QuadPart) / frequency1.QuadPart;
cudaMemcpy(threshold_R, dev_threshold_R, M_row* N_point * sizeof(float), cudaMemcpyDeviceToHost);
printf("\nthreshold_R[i][j]: \n");
for (int i = 0; i < M_row; i++) {
for (int j = 0; j < N_point; j++) {
if (location[i * N_point + j] == 1) {
printf("%f\t", threshold_R[i * N_point + j]);
printf("%d+%d\n", i, j);
}
}
}
int* location_R = NULL;
cudaMallocHost((void**)&location_R, 32 * 1080 * sizeof(float));
for (int i = 0; i < M_row; i++) {
for (int j = 0; j < N_point; j++) {
if (location[i * N_point + j] == 1) {
if (PC_data_ifft_CA_abs[j + i * N_point] >= (1 * threshold_R[i * N_point + j])) {
location_R[i * N_point + j] = 1;
}
else {
location_R[i * N_point + j] = 0;
}
}
else {
location_R[i * N_point + j] = 0;
}
}
}
int N_prot_F = 5;
int N_ref_F = 10;
float* threshold_F = NULL;
cudaMallocHost((void**)&threshold_F, 32 * 1080 * sizeof(float));
int* dev_location_R;
float* dev_threshold_F;
cudaMalloc((void**)&dev_location_R, M_row * N_point * sizeof(int));
cudaMalloc((void**)&dev_threshold_F, M_row * N_point * sizeof(float));
cudaMemcpy(dev_location_R, location_R, M_row * N_point * sizeof(int), cudaMemcpyHostToDevice);
int block_size2 = 16;
dim3 threadsPerBlock2(block_size2, block_size2);
dim3 numBlocks2((M_row + threadsPerBlock.x - 1) / threadsPerBlock.x, (N_point + threadsPerBlock.y - 1) / threadsPerBlock.y);
QueryPerformanceFrequency(&frequency2);
QueryPerformanceCounter(&start_time2);
cfar_2d_kernel2 << <numBlocks2, threadsPerBlock2 >> > (d_PC_data_ifft_CA_abs, dev_location_R, dev_threshold_F, M_row, N_point, N_prot_F, N_ref_F, alpha);
QueryPerformanceCounter(&end_time2);
elapsed_time2 = (double)(end_time2.QuadPart - start_time2.QuadPart) / frequency2.QuadPart;
cudaMemcpy(threshold_F, dev_threshold_F, M_row* N_point * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(d_PC_data_ifft_CA_abs);
elapsed_time = elapsed_time1 + elapsed_time2 ;
printf("elapsed_time1: %f ms elapsed_time2: %f ms \n",(elapsed_time1 * 1000), (elapsed_time2 * 1000));
printf("GPU运行时间: %f ms\n", (elapsed_time * 1000));
int location_R_F[M_row][N_point] = { 0 };
for (int i = 0; i < M_row; i++) {
for (int j = 0; j < N_point; j++) {
if (location_R[i * N_point + j] == 1) {
if (PC_data_ifft_CA_abs[j + i * N_point] >= (1 * threshold_F[i * N_point + j])) {
location_R_F[i][j] = 1;
printf("i=%d\t j=%d\n", i, j);
printf("%f\n", PC_data_ifft_CA_abs[i * N_point + j]);
}
else {
location_R_F[i][j] = 0;
}
}
else {
location_R_F[i][j] = 0;
}
}
}
free(PC_data_ifft_CA_abs);
return 0;
}