<2022-05-05 周四>
如何写ScaleImage()的硬件加速函数(十)
难道就这么被我轻松实现了?
“如何写ScaleImage()的硬件加速函数(九)”是在“如何写ScaleImage()的硬件加速函数(八)”的基础上处理了图片放大变亮的问题,但是他们都只是X方向的处理,没有实现原始函数ScaleImage()的Y方向缩放。
目前先处理Y方向再处理X方向的代码都有了,如下:
static MagickBooleanType scaleFilter(MagickCLDevice device,
cl_command_queue queue, const Image* image, Image* filteredImage,
cl_mem imageBuffer, cl_uint matte_or_cmyk, cl_uint columns, cl_uint rows,
cl_mem scaledImageBuffer, cl_uint scaledColumns, cl_uint scaledRows,
ExceptionInfo* exception)
{
cl_kernel
scaleKernel;
cl_int
status;
const unsigned int
workgroupSize = 256;
float
scale;
int
numCachedPixels;
MagickBooleanType
outputReady;
size_t
gammaAccumulatorLocalMemorySize,
gsize[2],
i,
imageCacheLocalMemorySize,
pixelAccumulatorLocalMemorySize,
pixelAccumulatorLocalMemorySize2,
lsize[2],
totalLocalMemorySize,
weightAccumulatorLocalMemorySize;
unsigned int
chunkSize,
pixelPerWorkgroup;
scaleKernel = NULL;
outputReady = MagickFalse;
scale = (float)scaledColumns / columns; // TODO(ocl)
unsigned int stop = 0;
unsigned int next_row = 1;
float y_span = 1.0;
float y_scale = (float)scaledRows / rows;
if (scaledRows == rows)
stop++;
else {
while (y_scale < y_span) {
if (next_row) {
stop++;
}
y_span -= y_scale;
y_scale = (float)scaledRows / rows;
next_row = 1;
}
if (next_row) {
stop++;
next_row = 0;
}
}
if (scaledColumns < workgroupSize)
{
chunkSize = 32;
pixelPerWorkgroup = 32;
}
else
{
chunkSize = workgroupSize;
pixelPerWorkgroup = workgroupSize;
}
DisableMSCWarning(4127)
while (1)
RestoreMSCWarning
{
/* calculate the local memory size needed per workgroup */
numCachedPixels=(int) ceil((pixelPerWorkgroup-1)/scale+2*(0.5+MagickEpsilon)); // TODO(ocl)
imageCacheLocalMemorySize = numCachedPixels * sizeof(CLQuantum) * 4 * stop;
totalLocalMemorySize = imageCacheLocalMemorySize;
/* local size for the pixel accumulator */
pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
totalLocalMemorySize += pixelAccumulatorLocalMemorySize;
pixelAccumulatorLocalMemorySize2 = pixelAccumulatorLocalMemorySize;
totalLocalMemorySize += pixelAccumulatorLocalMemorySize2;
/* local memory size for the weight accumulator */
weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
totalLocalMemorySize += weightAccumulatorLocalMemorySize;
/* local memory size for the gamma accumulator */
gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
totalLocalMemorySize += gammaAccumulatorLocalMemorySize;
if (totalLocalMemorySize <= device->local_memory_size)
break;
else
{
pixelPerWorkgroup = pixelPerWorkgroup / 2;
chunkSize = chunkSize / 2;
if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
{
/* quit, fallback to CPU */
goto cleanup;
}
}
}
scaleKernel = AcquireOpenCLKernel(device, "ScaleFilter");
if (scaleKernel == (cl_kernel)NULL)
{
(void)OpenCLThrowMagickException(device, exception, GetMagickModule(),
ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
goto cleanup;
}
i = 0;
status = SetOpenCLKernelArg(scaleKernel, i++, sizeof(cl_mem), (void*)&imageBuffer);
status |= SetOpenCLKernelArg(scaleKernel, i++, sizeof(cl_uint

本文详细描述了如何使用OpenCL在硬件上优化ScaleImage函数进行图像缩放,包括Y轴处理和工作流程。然而,作者提到在将代码从Linux移植到Windows时遇到问题,涉及到跨平台兼容性挑战和输出显示错误。
最低0.47元/天 解锁文章
1221

被折叠的 条评论
为什么被折叠?



