cuda教程里有一个Julia图像生成案例,自己用cuda和opencv模仿写了一下,发现结构体cuComplex的一处写法错了。
原文写法是:
struct cuComplex
{
float r;
float i;
cuComplex(float a, float b):r(a), i(b) {}
};
现在正确写法是:
struct cuComplex
{
float r;
float i;
__device__ cuComplex(float a, float b) {
r = a;
i = b;
}
};
不同之处在于初始化方法前要加__device__,指明是要在显卡上运行的,然后要用改进后方法来进行初始化。
此外,bitmap自己改为opencv的Mat来代替,并添加了随机数,实时生成不同效果。这里不建议使用cuda的随机数生成器,太麻烦了。自己投了懒,在cpu上生成随机数后,送到gpu里进行处理。
完整代码如下:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <ctime>
#define DIM 512
struct cuComplex
{
float r;
float i;
__device__ cuComplex(float a, float b) {
r = a;
i = b;
}
__device__ float magnitude2(void) {
return r*r + i*i;
}
__device__ cuComplex operator*(const cuComplex&a) {
return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i);
}
__device__ cuComplex operator+(const cuComplex&a) {
return cuComplex(r+a.r, i+a.i);
}
};
__device__ int julia(int x, int y, double* pd) {
const float scale = 1.5;
float jy = scale*(float)(DIM / 2 - y) / (DIM / 2);
float jx = scale*(float)(DIM / 2 - x) / (DIM / 2);
//cuComplex c(-0.8, 0.156);
cuComplex c(pd[0], pd[1]);
cuComplex a(jx, jy);
for (int i = 0; i < 200; i++) {
a = a*a + c;
if (a.magnitude2() > 1000)
return 0;
}
return 1;
}
__global__ void kernel(uchar *ptr, double* pd, uchar* pixel) {
int x = blockIdx.x; //x坐标
int y = blockIdx.y;//y坐标
int offset = x + y*gridDim.x;//偏移
int juliaValue = julia(x, y, pd);
ptr[offset * 4 + 0] = 255 * juliaValue;
ptr[offset * 4 + 1] = pixel[0];
ptr[offset * 4 + 2] = pixel[1];
ptr[offset * 4 + 3] = pixel[2];
}
int main() {
std::srand((int)time(0));
cv::Mat bitmap(DIM, DIM, CV_8UC4);
cv::namedWindow("", cv::WINDOW_NORMAL);
dim3 grid(DIM, DIM);
while (true) {
uchar* data = (uchar*)malloc(DIM*DIM * 4 * sizeof(uchar));
uchar* dev_bitmap;
double* pd, *dev_pd;
uchar* pixel, *dev_pixel;
pd = (double*)malloc(2 * sizeof(double));
pd[0] = -rand() / double(RAND_MAX);
pd[1] = rand() / double(RAND_MAX);
pixel = (uchar*)malloc(3 * sizeof(uchar));
pixel[0] = (rand() % 255);
pixel[1] = (rand() % 255);
pixel[2] = (rand() % 255);
cudaMalloc((void**)&dev_bitmap, DIM*DIM * 4 * sizeof(uchar));
cudaMalloc((void**)&dev_pd, 2 * sizeof(double));
cudaMalloc((void**)&dev_pixel, 3 * sizeof(uchar));
cudaMemcpy(dev_pd, pd, 2 * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(dev_pixel, pixel, 3 * sizeof(uchar), cudaMemcpyHostToDevice);
kernel << <grid, 1 >> > (dev_bitmap, dev_pd, dev_pixel);
cudaMemcpy(data, dev_bitmap, DIM*DIM * 4 * sizeof(uchar), cudaMemcpyDeviceToHost);
bitmap.data = data;
cv::imshow("", bitmap);
free(pd); pd = NULL;
free(pixel); pixel = NULL;
free(data); data = NULL;
cudaFree(dev_pd);
cudaFree(dev_pixel);
cudaFree(dev_bitmap);
cv::waitKey(2000);
}
return 0;
}
效果: