很多图像处理算法,编写完毕、调试正确后,就是漫长的算法优化。有些算法处理效果很棒,但是运算非常耗时,还是无法集成进产品给用户用。本文介绍的基于多线程和图像分块实现加速运算,只是众多加速方法的一种,比较容易编写和实现。还有更底层的加速方法,如编写指令集加速运算(intel芯片的sse指令集或者arm芯片的neon指令集),即SIMD(Single Instruction Multiple Data,单指令多数据)技术,不过如果if条件判断比较多的话,编写会很头疼,本人目前在这方面代码量还不够,还写不出太多心得。
对于一副图像,比如1000*800分辨率,我们在处理时,通常思路是从第1个像素开始,一直计算到最后一个像素。其实,目前不论手机还是个人电脑,处理器都是多核。那么完全可以将整副图像分成若干块,比如cpu为4核处理器,那么可以分成4块,每块图像大小为1000*200,这样程序可以创建4个线程,每个处理器执行一个线程,每个线程处理一个图像块。虽然这样操作后,运算速度不会显著提升4倍,因为线程创建、释放、上下文切换都要耗些时间。但运算速度还是将明显提升,一般4核 vs 1核,运算时间将降低一半。下面为完整代码,基本可以通用。
#include "stdafx.h"
#include <cv.h>
#include <highgui.h>
#include "pthread.h"
#include <time.h>
using namespace cv;
using namespace std;
#ifndef uchar
#define uchar unsigned char
#endif
#ifndef MAX
#define MAX(x, y) (((x) >= (y)) ? (x) : (y))
#endif
#ifndef MIN
#define MIN(x, y) (((x) >= (y)) ? (y) : (x))
#endif
#define CLAMP_XY(x, y) (((x) < 0) ? 0 : ((x) > (y) ? (y) : (x)))
#define AXJ_BLUE 0
#define AXJ_GREEN 1
#define AXJ_RED 2
#define BMPFORMAT_RGB32_R8G8B8A8 4
typedef struct __tag_bmpinfo
{
unsigned int dwPixelFormat;
int lWidth;
int lHeight;
int lPitch[3];
unsigned char* pPlane[3];
}BMPINFO, *LPBMPINFO;
typedef struct __filter_info
{
BMPINFO *pSrcBitmap;
float intensity;
int param0;
int param1;
} FilterInfo, *PFilterInfo;
// 转黑白图像
void ConvertToBlackWhite(BMPINFO *pSrcBitmap)
{
uchar lightness = 0, max_val = 0, min_val = 0;
uchar * pSrcData = pSrcBitmap-> pPlane[0];
int size = pSrcBitmap->lWidth * pSrcBitmap->lHeight;
for (int i = 0; i < size; i++, pSrcData += 4)
{
max_val = MAX(MAX(pSrcData[AXJ_BLUE], pSrcData[AXJ_GREEN]), pSrcData[AXJ_RED]);
min_val = MIN(MIN(pSrcData[AXJ_BLUE], pSrcData[AXJ_GREEN]), pSrcData[AXJ_RED]);
lightness = (max_val + min_val + 1) / 2;
pSrcData[AXJ_BLUE] = pSrcData[AXJ_GREEN] = pSrcData[AXJ_RED] = lightness;
}
}
// 计算线程
void* ImageFilterThread(void *arg)
{
FilterInfo *filter_info = (FilterInfo *)arg;
BMPINFO *pSrcBitmap = filter_info->pSrcBitmap;
int intensity = (int)CLAMP_XY(filter_info->intensity * 256, 256);
uchar *dataCopy = (uchar *)malloc(pSrcBitmap->lPitch[0] * pSrcBitmap->lHeight);
memcpy(dataCopy, pSrcBitmap->pPlane[0], pSrcBitmap->lPitch[0] * pSrcBitmap->lHeight);
// 以彩色图像转黑白图像为例(印象里ps3就是这个算法)
ConvertToBlackWhite(pSrcBitmap);
// 输出结果
uchar *src_data = dataCopy;
uchar *dst_data = pSrcBitmap->pPlane[0];
int size = pSrcBitmap->lWidth * pSrcBitmap->lHeight;
for (int i = 0; i < size; i++, src_data += 4, dst_data += 4)
{
dst_data[0] = (src_data[0] * (256 - intensity) + dst_data[0] * intensity) >> 8;
dst_data[1] = (src_data[1] * (256 - intensity) + dst_data[1] * intensity) >> 8;
dst_data[2] = (src_data[2] * (256 - intensity) + dst_data[2] * intensity) >> 8;
}
free(dataCopy);
dataCopy = NULL;
return NULL;
}
// 分块计算
void ImageFilterCommon(BMPINFO *pSrcBitmap, int block_count)
{
// 计算分块参数
int block_src_height = pSrcBitmap->lHeight / block_count;
int block_src_size = pSrcBitmap->lPitch[0] * block_src_height;
pthread_t *block_thread = (pthread_t *)malloc(block_count * sizeof(pthread_t));
BMPINFO *block_src_bmp = (BMPINFO *)malloc(block_count * sizeof(BMPINFO));
FilterInfo *filter_info_array = (FilterInfo *)malloc(block_count * sizeof(FilterInfo));
// 前n-1块
int i = 0;
for (i = 0; i < block_count - 1; i++)
{
memset(&block_src_bmp[i], 0, sizeof(BMPINFO));
memset(&filter_info_array[i], 0, sizeof(FilterInfo));
block_src_bmp[i].dwPixelFormat = BMPFORMAT_RGB32_R8G8B8A8;
block_src_bmp[i].lWidth = pSrcBitmap->lWidth;
block_src_bmp[i].lHeight = block_src_height;
block_src_bmp[i].lPitch[0] = pSrcBitmap->lPitch[0];
block_src_bmp[i].pPlane[0] = pSrcBitmap->pPlane[0] + block_src_size * i;
filter_info_array[i].pSrcBitmap = &block_src_bmp[i];
filter_info_array[i].intensity = 0.8f;
pthread_create(&block_thread[i], NULL, ImageFilterThread, &filter_info_array[i]);
}
// 最后一块
i = block_count - 1;
memset(&block_src_bmp[i], 0, sizeof(BMPINFO));
memset(&filter_info_array[i], 0, sizeof(FilterInfo));
block_src_bmp[i].dwPixelFormat = BMPFORMAT_RGB32_R8G8B8A8;
block_src_bmp[i].lWidth = pSrcBitmap->lWidth;
block_src_bmp[i].lHeight = pSrcBitmap->lHeight - block_src_height * i;
block_src_bmp[i].lPitch[0] = pSrcBitmap->lPitch[0];
block_src_bmp[i].pPlane[0] = pSrcBitmap->pPlane[0] + block_src_size * i;
filter_info_array[i].pSrcBitmap = &block_src_bmp[i];
filter_info_array[i].intensity = 0.8f;
pthread_create(&block_thread[i], NULL, ImageFilterThread, &filter_info_array[i]);
// 阻塞主线程, 等待分块计算完成
for (i = 0; i < block_count; i++)
{
pthread_join(block_thread[i], NULL);
}
// todo 结果合成
// 类似于调色这样的计算, 不需要将各个块合成整幅图像
// 但如果涉及空间计算, 每个块间需要有重叠部分, 此时需要针对算法特点, 编写合成方法
// 释放资源
free(block_src_bmp);
block_src_bmp = NULL;
free(filter_info_array);
filter_info_array = NULL;
free(block_thread);
block_thread = NULL;
}
int main()
{
const char* fileName = "test.png";
Mat src = imread(fileName);
imshow("src", src);
BMPINFO srcbmp = { 0 }, texbmp = { 0 }, texbmp2 = { 0 }, lutbmp = { 0 };
srcbmp.dwPixelFormat = BMPFORMAT_RGB32_R8G8B8A8;
srcbmp.lWidth = src.cols;
srcbmp.lHeight = src.rows;
srcbmp.lPitch[0] = srcbmp.lWidth * 4;
srcbmp.pPlane[0] = (unsigned char*)malloc(srcbmp.lPitch[0] * srcbmp.lHeight);
uchar *pTempData = src.data;
uchar *pdata = srcbmp.pPlane[0];
for (int i = 0; i < src.rows * src.cols; i++, pdata += 4, pTempData += 3)
{
pdata[0] = pTempData[0];
pdata[1] = pTempData[1];
pdata[2] = pTempData[2];
pdata[3] = 255;
}
///
clock_t startTime = clock();
ImageFilterCommon(&srcbmp, 4);
printf("the time is.... %d ms\n", clock() - startTime);
///
Mat dst(cv::Size(srcbmp.lWidth, srcbmp.lHeight), src.type());
pTempData = dst.data;
pdata = srcbmp.pPlane[0];
for (int i = 0; i < dst.rows * dst.cols; i++, pdata += 4, pTempData += 3)
{
pTempData[0] = pdata[0];
pTempData[1] = pdata[1];
pTempData[2] = pdata[2];
}
imshow("dst", dst);
cv::waitKey();
return 0;
}
完整工程下载链接:http://download.youkuaiyun.com/detail/u013085897/9831338,第一次上传三个多小时后都没有显示,以为失败了,于是又上传了一次,结果今天早晨都显示出来了,然后想删掉一份,又无法删除。csdn的这个审核机制有点坑,无法及时删除资源,也挺坑的,不知道是基于什么考虑。