__global__ void matrix_mul_gpu(int *M, int* N, int* P, int width)
{
int i = threadIdx.x + blockDim.x * blockIdx.x;
int j = threadIdx.y + blockDim.y * blockIdx.y;
int sum = 0;
for(int k=0;k<width;k++)
{
int a = M[j*width+k];
int b = N[k*width+i];
sum += a*b;
}
P[j*width+i] = sum;
}
cuda矩阵运算
最新推荐文章于 2024-04-29 13:34:21 发布