c++和cuda混合编程记录（一）

最新推荐文章于 2024-10-05 19:54:19 发布

原创最新推荐文章于 2024-10-05 19:54:19 发布 · 6.9k 阅读

13 ·

CC 4.0 BY-SA版权

文章标签：

#并行编程 #C++ #CUDA

CUDA 专栏收录该内容

17 篇文章

订阅专栏

本文探讨了CUDA与C++混编的可能性及挑战。通过实验对比使用类和struct的不同表现，验证了在某些情况下使用类并不会导致严重问题。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

最近在用cuda将RT并行化。

很多人都说，最好使用c来写cuda，不要用类，尽量用struct代替。因为在cuda里面使用oo来套现在会有很多问题。

我也试过，确实很多问题，之前用cpp写的光线追踪用cuda并行化过程中遇到很多问题！快要崩溃的边缘！之后把类改成了struct之后（估计我还改了其他东西-_-），居然奇迹的好了。所以心里一直有疑问，cuda真的不能和cpp一起使用嘛？？不能在cuda中使用oo的思想吗？？我也找过一些资料，就是说可能是和nvcc编译器有关的，很多人吐槽这个nvcc各种难用-_-（深表同意）。

所以我决定做做实验，将oo编程思想运用到cuda中（c++和cuda混合编程），看能否复现之前遇到的那些难以解决的bug。

// CUDA-C includes
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>

#define N ( 3 * 1024)


class object
{
public :
    __device__ object();
    __device__ ~object();
    __device__ virtual int hit()=0;
};
__device__ object::object()
{
   // printf("object\n");
    int x = 5;
}
__device__ object::~object()
{
    //printf("zz\n");
}
class sphere : public object
{
public:
    __device__ sphere();
    __device__ ~sphere();
    __device__ int hit();
};
__device__ sphere::sphere()
{
   // printf("ss\n");
}
__device__ int sphere::hit()
{
    return 45;
}
__device__ sphere::~sphere()
{
    printf("deconstrust\n");
}

class cube : public object
{
  public:
    __device__ cube();
    __device__ ~cube();
    __device__ int hit();
};
__device__ cube::cube()
{

}
__device__ cube::~cube()
{

}
__device__ int cube::hit()
{
    return 55;
}

__device__ int AddHit(object* o, object* t)
{
    return o->hit() + t->hit();
}

__global__ void  add(int *a , int *b , int* c , int* d)
{
     sphere* s = new sphere();
     cube* cu = new cube();
     int x =  AddHit(s,cu);
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    while( tid < N)
    {
       c[ tid ] = x;
       tid += blockDim.x * gridDim.x;
    }
}
// Main cuda function
int a[N] , b[N] , c[N];
void runCudaPart()
{
    int a[N] , b[N] , c[N];
    int *dev_a, *dev_b , *dev_c;
    int *d;
    int count = 2;

    cudaMalloc( (void**)&dev_a , N * sizeof(int ) );
    cudaMalloc( (void**)&dev_b , N * sizeof(int ) );
    cudaMalloc( (void**)&dev_c , N * sizeof(int ) );
    cudaMalloc( (void**)&d , N * sizeof(int ) );


    for(int i = 0; i < N; i++)
    {
        a[i] = -i;
        b[i] = i + 1;

    }

    cudaMemcpy( dev_a , a , N * sizeof(int) , cudaMemcpyHostToDevice);
    cudaMemcpy( dev_b , b , N * sizeof(int) , cudaMemcpyHostToDevice);

    add<<<128,128>>>( dev_a , dev_b , dev_c, d);

    cudaMemcpy( c , dev_c , N * sizeof(int) , cudaMemcpyDeviceToHost);


    int errCounts = 0;
    //显示结果
    for(int  i = 0; i < N ; i++)
    {
        printf(" %d \n",  c[i]);
    }
    cudaFree( dev_a );
    cudaFree( dev_b );
    cudaFree( dev_c );

}


int main(int argc, char *argv[])
{
//    sphere* s = new sphere();
//    int x = s->hit();
//    printf("x = %d\n",x);
//    delete s;
    runCudaPart();
}

这份代码跑起来还是没问题的。

奇怪，居然没问题-_-。

先马一下，继续修改代码继续其他实验。