CUDA图像处理 | Sobel算子边缘提取
CUDA数组与纹理操作
- CUDA数组与设备内存从相同的内存池中分配,但前者拥有有一个细节不明的布局:为2D和3D局部性做了优化。
- CUDA数组不消耗CUDA地址空间
- CUDA可以只在设备内存中驻留,GPU在总线中传输数据时在这两种形式之间转换。
- 纹理存储器是一种只读存储器。
- kernel 函数通过纹理参考系从纹理内存中读取数据(Texture Fetching,纹理拾取)。
- 纹理参照系必须通过 cudaBindTexture 或者 cudaBindTextureToArray 进行纹理绑定(Texture Blinding)
- 用 cudaUnbindTexture 来解决绑定。
使用纹理存储器处理图像一般包括几个步骤:
声明纹理参照系:
texture<Type,Dim,cudaReaMode> tex;
声明CUDA数组,设置绑定参数,初始化CUDA数组:
cuda Array *array = NULL; desc = cudaCreateChannelDesc<unsigned char>(); cudaMallocArray(&array,&desc,nWidth,nHeight); cudaMemcpyToArray(array,0,0,h_pData,sizeof(Pixel)*nWidth*nHeight,cudaMemcpyHostToDevice);
纹理绑定:
cudaBindTextureToArray(tex,array);
kernel纹理拾取与其他图像处理操作
d_pData[y*width+x] = tex2D(tex,tu,tv); // ...
解除绑定:
cudaUnbindTexture(tex);
释放CUDA数组
cudaFreeArray(array);
Sobel算子原理
Sobel 算子 主要作用:边缘检测。它是一种离散性质查分算子。
该算子包含横向或纵向,将之与图像做平面卷积,记得得到鲁昂度的差分。
/* 1: Gx // 横向 -1 0 +1 -2 0 +2 -1 0 +1 2: Gy // 纵向 +1 +2 +1 0 0 0 -1 -2 -1 */
G = sqrt(Gx^2+Gy^2)
|G| = |Gx|+|Gy|
如梯度G大于某一个阈值! 则认为该点(x,y)为边缘点。
梯度方向为:
θ = arctan(Gy/Gx)
GPU代码实现
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
// cuda
#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h" // 此头文件包含 __syncthreads ()函数
// cuda纹理
#ifndef __CUDACC__
#define __CUDACC__
#include "cuda_texture_types.h"
#include "texture_indirect_functions.h"
#include "texture_fetch_functions.h"
#endif
#include <iostream>
using namespace std;
using namespace cv;
texture<unsigned char, 2> tex;
cudaArray *array;
cudaChannelFormatDesc desc;
// 纹理
void setupTexture(int iw, int ih, unsigned char* data, int Bpp) {
if (Bpp == 1) {
desc = cudaCreateChannelDesc<unsigned char>();
}
else {
desc = cudaCreateChannelDesc<uchar4>();
}
cudaMallocArray(&array, &desc, iw, ih);
cudaMemcpyToArray(array, 0, 0, data, Bpp * sizeof(unsigned char)*ih*iw,cudaMemcpyHostToDevice);
}
__device__ unsigned char ComputeSobel(unsigned char ul, unsigned char um, unsigned char ur,
unsigned char ml, unsigned char mm, unsigned char mr,
unsigned char ll, unsigned char lm, unsigned char lr) {
short Horz = ur + 2 * mr + lr - ul- 2 * ml - ll;
short Vert = ul + 2 * um + ur - ll - 2 * lm - lr;
short Sum = (short)(abs(Horz) + abs(Vert));
if (Sum < 0) {
return 0;
}
else if (Sum > 0xff)
return 0xff;
else {
return (unsigned char)Sum;
}
}
__global__ void SobelTex(unsigned char*pSobelOrignal, unsigned int Pitch, int w, int h, float fScale) {
unsigned char* pSobel = (unsigned char*)(((char *)pSobelOrignal) + blockIdx.x*Pitch);
for (int i = threadIdx.x; i < w; i + blockDim.x) {
unsigned char pix00 = tex2D(tex, (float)i - 1, (float)blockIdx.x - 1);
unsigned char pix01 = tex2D(tex, (float)i , (float)blockIdx.x - 1);
unsigned char pix02 = tex2D(tex, (float)i+1, (float)blockIdx.x - 1);
unsigned char pix10 = tex2D(tex, (float)i -1, (float)blockIdx.x );
unsigned char pix11 = tex2D(tex, (float)i, (float)blockIdx.x);
unsigned char pix12 = tex2D(tex, (float)i+1, (float)blockIdx.x);
unsigned char pix20 = tex2D(tex, (float)i - 1, (float)blockIdx.x+1);
unsigned char pix21 = tex2D(tex, (float)i, (float)blockIdx.x+1);
unsigned char pix22 = tex2D(tex, (float)i + 1, (float)blockIdx.x+1);
pSobel[i] = ComputeSobel(pix00, pix01, pix02, pix10, pix11, pix12, pix20, pix21, pix22);
}
}
// Sobel算子边缘提取核心代码
extern "C"
double cudaSobelFilter(unsigned char* pDestGPU, unsigned char*pSrcGPU, int nWidth, int nHeight) {
unsigned char* d_pSrcGPU = NULL;
unsigned char* d_pDestGPU = NULL;
cudaMalloc((void**)&d_pSrcGPU, nWidth*nHeight * sizeof(unsigned char));
cudaMalloc((void**)&d_pDestGPU, nWidth*nHeight * sizeof(unsigned char));
cudaMemset(d_pSrcGPU, 0, nWidth*nHeight * sizeof(unsigned char));
cudaMemset(d_pDestGPU, 0, nWidth*nHeight * sizeof(unsigned char));
cudaMemcpy(d_pSrcGPU, pSrcGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);
// Sobel算子边缘检测
float imgScale = 1.f;
setupTexture(nWidth, nHeight, pSrcGPU, 1);
cudaBindTextureToArray(&tex, array, &desc);
SobelTex << <nHeight, 256 >> > (d_pDestGPU, nWidth,nWidth, nHeight, imgScale);
cudaUnbindTexture(&tex);
cudaMemcpy(pDestGPU, d_pDestGPU, nWidth*nHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cudaFree(d_pDestGPU);
cudaFree(d_pSrcGPU);
return 0;
}
/
int main() {
Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);
Mat img;
image_source.copyTo(img);
cudaSobelFilter(img.data, image_source.data, image_source.cols, image_source.rows);
imshow("sobel",img);
cv::waitKey(0);
system("pause");
return 0;
}