在这篇博客中,我将指导您如何编写用于 2D 卷积的 cuda 内核。
所需库:
#include <stdio.h>
#include <cuda_runtime.h>
核心:
#define IS 5
#define KS 3
__global__
void convolution2DKernel(const float *input, const float *kernel, float *output,
int inputWidth, int inputHeight,
int kernelWidth, int kernelHeight) {
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;
if (col < inputWidth && row < inputHeight) {
int halfKernelWidth = kernelWidth / 2;
int halfKernelHeight = kernelHeight / 2;
float result =