网格、块、线程的索引计算(二维)
一个线程在网格中的块的索引是ix = threadIdx.x + blockIdx.x * blockDim.x和iy = threadIdx.y + blockIdx.y * blockDim.y
代码
/******************************************************************
* Author: Da Liu
* Date: 2024-07-25
* File: grid2D_block2D.cu
* Description: 组织线程模型:二维网格二维线程块计算二维矩阵加法.
*****************************************************************/
#include <stdio.h>
#include "../cudalearn/tools/common.cuh"
__global__ void add_matrix(int *a, int *b, int *c, const int nx, const int ny)
{
int ix = threadIdx.x + blockIdx.x * blockDim.x;
int iy = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int idx = iy * nx + ix;
if (ix < nx && iy < ny) {
c[idx] = a[idx] + b[idx];
}
}
int main()
{
setGPU(); //设置GPU设备
int nx = 16, ny = 8; //矩阵大小
int nxy = nx * ny; //矩阵元素个数
size_t stBytesCount = nxy * sizeof(int); //矩阵元素字节数
int *ipHost_A, *ipHost_B, *ipHost_C; //主机内存
ipHost_A = (int* )malloc(stBytesCount);
ipHost_B = (int* )malloc(stBytesCount);
ipHost_C = (int* )malloc(stBytesCount);
if (ipHost_A != NULL && ipHost_B != NULL && ipHost_C != NULL)
{
for(int i = 0; i < nxy; i++)
{
ipHost_A[i] = i; //矩阵A元素初始化为0到nxy-1
ipHost_B[i] = i + 1; //矩阵B元素初始化为A元素+1
}
memset(ipHost_C, 0, stBytesCount); //初始化矩阵C为0
}
else
{
printf("Memory allocation failed!\n");
exit(-1);
}
int *ipDevice_A, *ipDevice_B, *ipDevice_C; //设备内存
ErrorCheck(cudaMalloc((int**)&ipDevice_A, stBytesCount), __FILE__, __LINE__);
ErrorCheck(cudaMalloc((int**)&ipDevice_B, stBytesCount), __FILE__, __LINE__);
ErrorCheck(cudaMalloc((int**)&ipDevice_C, stBytesCount), __FILE__, __LINE__);
if (ipDevice_A != NULL && ipDevice_B != NULL && ipDevice_C != NULL)
{
ErrorCheck(cudaMemcpy(ipDevice_A, ipHost_A, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
ErrorCheck(cudaMemcpy(ipDevice_B, ipHost_B, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
ErrorCheck(cudaMemcpy(ipDevice_C, ipHost_C, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
}
else
{
printf("Device Memory copy failed!\n");
free(ipHost_A);
free(ipHost_B);
free(ipHost_C);
exit(1);
}
dim3 blockDim(4, 4);
dim3 gridDim((nx + blockDim.x - 1) / blockDim.x, (ny + blockDim.y - 1) / blockDim.y);
printf("Grid Dim: %d, %d\n", gridDim.x, gridDim.y);
printf("Block Dim: %d, %d\n", blockDim.x, blockDim.y);
add_matrix<<<gridDim, blockDim>>>(ipDevice_A, ipDevice_B, ipDevice_C, nx, ny);
ErrorCheck(cudaMemcpy(ipHost_C, ipDevice_C, stBytesCount, cudaMemcpyDeviceToHost), __FILE__, __LINE__);
for (int i = 0; i < 20; i++)
{
printf("id = %d, matrix_A = %d, matrix_B = %d, matrix_C = %d\n", i + 1, ipHost_A[i], ipHost_B[i], ipHost_C[i]);
}
free(ipHost_A);
free(ipHost_B);
free(ipHost_C);
ErrorCheck(cudaFree(ipDevice_A), __FILE__, __LINE__);
ErrorCheck(cudaFree(ipDevice_B), __FILE__, __LINE__);
ErrorCheck(cudaFree(ipDevice_C), __FILE__, __LINE__);
return 0;
}
这里代码中的commom.cuh的代码是:
/******************************************************************
* Author: Da Liu
* Date: 2024-07-10
* File: common.cuh
*****************************************************************/
#pragma once
#include<iostream>
#include<stdlib.h>
#include<stdio.h>
#include<cuda_runtime.h>
cudaError_t ErrorCheck(cudaError_t error_code, const char* filename, int lineNumber);
//查看当前可用GPU设备数 并将可用device设置为0
void setGPU(){
int iDeviceCount = 0;
cudaError_t err = ErrorCheck(cudaGetDeviceCount(&iDeviceCount), __FILE__, __LINE__);
if (err!= cudaSuccess || iDeviceCount == 0)
{
std::cout<< "No CUDA-capable device found." << std::endl;
exit(-1);
}
else
{
std::cout << "Number of CUDA-capable devices found: " << iDeviceCount << std::endl;
}
int iDevice = 0;
err = ErrorCheck(cudaSetDevice(iDevice), __FILE__, __LINE__);
if( err != cudaSuccess)
{
std::cout << "Failed to set device " << iDevice << std::endl;
exit(-1);
}
else
{
std::cout << "Device " << iDevice << " set successfully." << std::endl;
}
}
//错误检查函数
cudaError_t ErrorCheck(cudaError_t error_code, const char* filename, int lineNumber)
{
if(error_code != cudaSuccess)
{
std::cout << "CUDA error:\r\ncode=%d,name=%s,description=%s\r\nfile=%s,line=%d\r\n" << error_code
<< cudaGetErrorName(error_code) << cudaGetErrorString(error_code) << filename << lineNumber << std::endl;
return error_code;
}
return error_code;
}
此时需要将grid2D_block2D.cu代码变成可执行文件,在代码目录的终端中输入
nvcc name.cu -o name
./name.exe
这样就可以输出结果。