hello_world.cu
#include <stdio.h>
// CUDA 内核函数 - 在GPU上执行
__global__ void helloFromGPU()
{
// 每个线程都会执行这个函数
printf("Hello World from GPU! Thread %d in block %d\n", threadIdx.x, blockIdx.x);
}
int main()
{
// 在主机(CPU)上打印
printf("Hello World from CPU!\n");
// 启动内核函数
// 参数: 块数量, 每个块的线程数量
helloFromGPU<<<2, 5>>>();
// 等待GPU完成
cudaDeviceSynchronize();
return 0;
}
编译运行:
nvcc hello_world.cu -o hello_world
输出如下:
Hello World from CPU!
Hello World from GPU! Thread 0 in block 1
Hello World from GPU! Thread 1 in block 1
Hello World from GPU! Thread 2 in block 1
Hello World from GPU! Thread 3 in block 1
Hello World from GPU! Thread 4 in block 1
Hello World from GPU! Thread 0 in block 0
Hello World from GPU! Thread 1 in block 0
Hello World from GPU! Thread 2 in block 0
Hello World from GPU! Thread 3 in block 0
Hello World from GPU! Thread 4 in block 0