知识点:constant memory ,cuda event的使用
#include "C://Users//XX//Desktop//CUDA//common//cpu_bitmap.h"
#include<stdio.h>
#define DIM 1024
#define INF 2e10f
#define rnd(x) (x*rand()/RAND_MAX)
//定义球体个数为20
#define SPHERES 20
struct Sphere{
float r,g,b;//r,g,b为颜色值
float radius;
float x,y,z;//sphere's center coordinate
__device__ float hit(float ox,float oy,float *n){
float dx=ox-x;
float dy=oy-y;
if(dx*dx+dy*dy<radius*radius){
float dz=sqrtf(radius*radius-dx*dx-dy*dy);
*n=dz/sqrtf(radius*radius);
return dz+z;
}
return -INF;
}
};
__constant__ Sphere s[SPHERES];
__global__ void kernel( unsigned char *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float ox = (x - DIM/2);
float oy = (y - DIM/2);
float r=0, g=0, b=0;
float maxz = -INF;
for(int i=0; i<SPHERES; i++) {
float n;
float t = s[i].hit( ox, oy, &n );
if (t > maxz) {
float fscale = n;
r = s[i].r * fscale;
g = s[i].g * fscale;
b = s[i].b * fscale;
maxz = t;
}
}
ptr[offset*4 + 0] = (int)(r * 255);
ptr[offset*4 + 1] = (int)(g * 255);
ptr[offset*4 + 2] = (int)(b * 255);
ptr[offset*4 + 3] = 255;
}
int main(void)
{
cudaEvent_t start,stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
CPUBitmap bitmap(DIM,DIM);
unsigned char *dev_bitmap;
cudaMalloc((void**)&dev_bitmap,bitmap.image_size());
Sphere *temp_s=(Sphere*)malloc(sizeof(Sphere)*SPHERES);
for(int i=0;i<SPHERES;i++){
temp_s[i].r=rnd(1.0f);
temp_s[i].g=rnd(1.0f);
temp_s[i].b=rnd(1.0f);
temp_s[i].x=rnd(1000.0f)-500;
temp_s[i].y=rnd(1000.0f)-500;
temp_s[i].z=rnd(1000.0f)-500;
temp_s[i].radius=rnd(100.0f)+20;
}
//拷贝到constant memory
//默认最后一个参数为cudaMemcpyHostToDevice
cudaMemcpyToSymbol(s,temp_s,sizeof(Sphere)*SPHERES);
free(temp_s);
dim3 grids(DIM./16,DIM/16);
dim3 threads(16,16);
kernel<<<grids,threads>>>(dev_bitmap);
cudaMemcpy(bitmap.get_ptr(),dev_bitmap,
bitmap.image_size(),
cudaMemcpyDeviceToHost);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,start,stop);
printf("Time to generate: %3.1f ms\n",elapsedTime);
cudaEventDestroy(start);
cudaEventDestroy(stop);
bitmap.display_and_exit();
cudaFree(dev_bitmap);
}