基于连通域和投影结合的汉字切割

最新推荐文章于 2021-03-16 16:03:03 发布
原创最新推荐文章于 2021-03-16 16:03:03 发布 · 1.6k 阅读
6 ·
CC 4.0 BY-SA版权
文章标签：
#ocr #算法 #汉字切割
本文介绍了一种基于连通域和投影算法的图像切割方法，该方法首先利用连通域找出图像中的一行文字，然后使用投影算法进一步切割出单个汉字。文章详细描述了具体的实现过程和技术细节。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
#include<io.h>  
#include<math.h>  
#include<vector>  
#include "cv.h"    
#include "highgui.h"    
#include "cxcore.h"    
#include <iostream>   
#include <string>  
#include "opencv2/opencv.hpp"  
#include "FreeImage.h"    
#pragma comment(lib,"FreeImage.lib")  


using namespace std;
using namespace cv;
#include <opencv2/opencv.hpp>    
int idx = 1;
double avgWidth = 0;
double avgHeight = 0;
#include <cassert>  

IplImage*  gif2ipl(const char* filename)
{
	FreeImage_Initialise();         //load the FreeImage function lib      
	FREE_IMAGE_FORMAT fif = FIF_GIF;
	FIBITMAP* fiBmp = FreeImage_Load(fif, filename, GIF_DEFAULT);
	FIMULTIBITMAP * pGIF = FreeImage_OpenMultiBitmap(fif, filename, 0, 1, 0, GIF_PLAYBACK);
	//  FIBITMAPINFO fiBmpInfo = getfiBmpInfo(fiBmp);      
	int gifImgCnt = FreeImage_GetPageCount(pGIF);
	FIBITMAP * pFrame;
	int width, height;
	width = FreeImage_GetWidth(fiBmp);
	height = FreeImage_GetHeight(fiBmp);
	IplImage * iplImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_8U, 3);
	iplImg->origin = 1;//should set to 1-top-left structure(Windows bitmap style)      
	RGBQUAD* ptrPalette = new RGBQUAD; // = FreeImage_GetPalette(fiBmp);      
	BYTE intens;
	BYTE* pIntensity = &intens;
	//cvNamedWindow("gif", 0);  
	//printf("gifImgCnt %d \n", gifImgCnt);  
	for (int curFrame = 0; curFrame < gifImgCnt; curFrame++)
	{
		pFrame = FreeImage_LockPage(pGIF, curFrame);
		//ptrPalette = FreeImage_GetPalette(pFrame);      
		char * ptrImgDataPerLine;
		for (int i = 0; i < height; i++)
		{
			ptrImgDataPerLine = iplImg->imageData + i*iplImg->widthStep;
			for (int j = 0; j < width; j++)
			{
				//get the pixel index       
				//FreeImage_GetPixelIndex(pFrame,j,i,pIntensity);        
				FreeImage_GetPixelColor(pFrame, j, i, ptrPalette);
				ptrImgDataPerLine[3 * j] = ptrPalette->rgbBlue;
				ptrImgDataPerLine[3 * j + 1] = ptrPalette->rgbGreen;
				ptrImgDataPerLine[3 * j + 2] = ptrPalette->rgbRed;
				//ptrImgDataPerLine[3*j] = ptrPalette[intens].rgbBlue;      
				//ptrImgDataPerLine[3*j+1] = ptrPalette[intens].rgbGreen;      
				//ptrImgDataPerLine[3*j+2] = ptrPalette[intens].rgbRed;      
			}
		}

		//printf("convert curFrame end %d \n", curFrame);  
		//cvShowImage("gif", iplImg);  
		cvWaitKey(30);
		FreeImage_UnlockPage(pGIF, pFrame, 1);
	}
	FreeImage_Unload(fiBmp);
	FreeImage_DeInitialise();
	FreeImage_CloseMultiBitmap(pGIF, GIF_DEFAULT);
	return iplImg;
}


vector<Mat> horizontalProjectionMat(Mat srcImg)//水平投影    
{
	//cvtColor(srcImg, srcImg, CV_BGR2GRAY); // 转为灰度图像    
	Mat binImg;
	blur(srcImg, binImg, Size(3, 3));
	threshold(binImg, binImg, 0, 255, CV_THRESH_OTSU);

	int perPixelValue = 0;//每个像素的值    
	int width = srcImg.cols;
	int height = srcImg.rows;
	int* projectValArry = new int[height];//创建一个储存每行白色像素个数的数组    
	memset(projectValArry, 0, height * 4);//初始化数组    
	for (int col = 0; col < height; col++)//遍历每个像素点    
	{
		for (int row = 0; row < width; row++)
		{
			perPixelValue = binImg.at<uchar>(col, row);
			if (perPixelValue == 0)//如果是白底黑字    
			{
				projectValArry[col]++;
			}
		}
	}
	Mat horizontalProjectionMat(height, width, CV_8UC1);//创建画布    
	for (int i = 0; i < height; i++)
	{
		for (int j = 0; j < width; j++)
		{
			perPixelValue = 255;
			horizontalProjectionMat.at<uchar>(i, j) = perPixelValue;//设置背景为白色    
		}
	}
	for (int i = 0; i < height; i++)//水平直方图    
	{
		for (int j = 0; j < projectValArry[i]; j++)
		{
			perPixelValue = 0;
			horizontalProjectionMat.at<uchar>(i, width - 1 - j) = perPixelValue;//设置直方图为黑色    
		}
	}
	//imshow("水平投影", horizontalProjectionMat);  
	vector<Mat> roiList;//用于储存分割出来的每个字符    
	int startIndex = 0;//记录进入字符区的索引    
	int endIndex = 0;//记录进入空白区域的索引    
	bool inBlock = false;//是否遍历到了字符区内    
	for (int i = 0; i <srcImg.rows; i++)
	{
		if (!inBlock && projectValArry[i] != 0)//进入字符区    
		{
			inBlock = true;
			startIndex = i;
		}
		else if (inBlock && projectValArry[i] == 0)//进入空白区    
		{
			endIndex = i;
			inBlock = false;
			Mat roiImg = srcImg(Range(startIndex, endIndex + 1), Range(0, srcImg.cols));//从原图中截取有图像的区域    
			roiList.push_back(roiImg);
		}
	}
	delete[] projectValArry;
	return roiList;
}
vector<Mat> verticalProjectionMat(Mat srcImg)//垂直投影    
{
	//cvtColor(srcImg, srcImg, CV_BGR2GRAY); // 转为灰度图像    
	Mat binImg;
	blur(srcImg, binImg, Size(3, 3));
	threshold(binImg, binImg, 0, 255, CV_THRESH_OTSU);
	int perPixelValue;//每个像素的值    
	int width = srcImg.cols;
	int height = srcImg.rows;
	int* projectValArry = new int[width];//创建用于储存每列白色像素个数的数组    
	memset(projectValArry, 0, width * 4);//初始化数组    
	for (int col = 0; col < width; col++)
	{
		for (int row = 0; row < height; row++)
		{
			perPixelValue = binImg.at<uchar>(row, col);
			if (perPixelValue == 0)//如果是白底黑字    
			{
				projectValArry[col]++;//这个数组就是统计x上一共多少个黑点。这样就形成了柱状图  
			}
		}
	}
	Mat verticalProjectionMat(height, width + 5, CV_8UC1);//垂直投影的画布    
	for (int i = 0; i < height; i++)
	{
		for (int j = 0; j < width; j++)
		{
			perPixelValue = 255;  //背景设置为白色    
			verticalProjectionMat.at<uchar>(i, j) = perPixelValue;
		}
	}
	for (int i = 0; i < width; i++)//垂直投影直方图    
	{
		for (int j = 0; j < projectValArry[i]; j++)
		{
			perPixelValue = 0;  //直方图设置为黑色      
			verticalProjectionMat.at<uchar>(height - 1 - j, i) = perPixelValue;
		}
	}
	//imshow("垂直投影", verticalProjectionMat);  
	cvWaitKey(0);
	vector<Mat> roiList;//用于储存分割出来的每个字符    
	int startIndex = 0;//记录进入字符区的索引    
	int endIndex = 0;//记录进入空白区域的索引    
	bool inBlock = false;//是否遍历到了字符区内    
	for (int i = 0; i < srcImg.cols; i++)//cols=width    
	{
		if (!inBlock && projectValArry[i] > 0)//进入字符区    
		{
			inBlock = true;
			startIndex = i;
		}
		else if (((projectValArry[i] == 0) && inBlock) || i == srcImg.cols - 1)//进入空白区    
		{
			endIndex = i;
			inBlock = false;
			Mat roiImg = srcImg(Range(0, srcImg.rows), Range(startIndex, endIndex + 1));
			roiList.push_back(roiImg);
		}
	}
	delete[] projectValArry;
	return roiList;
}

int  qiege(char* path)
{
	//int idx = 0;  
	IplImage* imgSrc = cvLoadImage(path, CV_LOAD_IMAGE_COLOR);
	if (imgSrc == NULL){
		imgSrc = gif2ipl(path);
	}
  
  
	IplImage* img_gray = cvCreateImage(cvGetSize(imgSrc), IPL_DEPTH_8U, 1);
	cvCvtColor(imgSrc, img_gray, CV_BGR2GRAY);//转成灰色  
	IplImage* img_gray_clone = cvCreateImage(cvGetSize(imgSrc), IPL_DEPTH_8U, 1);
	cvCopy(img_gray, img_gray_clone);
	cvErode(img_gray, img_gray, NULL, 1);//腐蚀处理，并显示    

	cvAdaptiveThreshold(img_gray, img_gray, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY_INV, 21, 3);//局部阈值  
	cvAdaptiveThreshold(img_gray_clone, img_gray_clone, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY_INV, 21, 3);//局部阈值  



	CvSeq* contours = NULL;
	CvSeq* contours2 = NULL;
	CvMemStorage* storage = cvCreateMemStorage(0);
	cvFindContours(img_gray, storage, &contours, sizeof(CvContour), CV_RETR_EXTERNAL);
	cvFindContours(img_gray_clone, storage, &contours2, sizeof(CvContour), CV_RETR_EXTERNAL);
	int count1 = 0;
	int count2 = 0;
	//腐蚀后  
	for (CvSeq* c = contours; c != NULL; c = c->h_next) {
		CvRect rc = cvBoundingRect(c, 0);
		if (rc.width<10 || rc.height<10 || rc.width>500 || rc.height>500)
		{
			continue;
		}
		count1++;
	}

	//没有腐蚀  
	for (CvSeq* c = contours2; c != NULL; c = c->h_next) {
		CvRect rc = cvBoundingRect(c, 0);
		if (rc.width<10 || rc.height<10 || rc.width>500 || rc.height>500)
		{
			continue;
		}
		count2++;
	}
	cout << "count1：" << count1 << " count2:" << count2;
	if (count2 > count1){
		contours = contours2;
	}

	char szName[56] = { 0 };
	char xzName[56] = { 0 };
	avgWidth = 0;
	avgHeight = 0;
	int countAvg = 0;
	int id = 0;
	//计算均值  

	for (CvSeq* c = contours; c != NULL; c = c->h_next) {
		CvRect rc = cvBoundingRect(c, 0);
		if (rc.width<10 || rc.height<10 || rc.width>500 || rc.height>500)
		{
			continue;    // 这里可以根据轮廓的大小进行筛选  
		}
		countAvg++;
		avgWidth += rc.width;
		avgHeight += rc.height;
	}
	avgWidth = avgWidth / countAvg;
	avgHeight = avgHeight / countAvg;
	//cout << "平均宽：" << avgWidth << "平均高：" << avgHeight << endl;  
	CvSeq* q = contours;
	CvRect cr = cvBoundingRect(q, 0);
	//这个里面就相当于存了当前的矩阵  
	int max_X = 0;
	int max_Y = 0;
	for (CvSeq* cc = contours->h_next; cc != NULL; cc = cc->h_next) {
		CvRect rc = cvBoundingRect(cc, 0);
		  
		if (rc.width<avgWidth / 3 || rc.height<avgHeight / 2 || rc.width>500 || rc.height>500)
		{
			continue;    // 这里可以根据轮廓的大小进行筛选  
		}
		
		if (max_Y - rc.y - rc.height > avgHeight && (max_Y - rc.y)>avgHeight){
			cr.height = max_Y - cr.y;
			cr.width = max_X - cr.x;
			// cvDrawRect(imgSrc, cvPoint(cr.x, cr.y), cvPoint(cr.x+cr.width, cr.y+cr.height), CV_RGB(255, 0, 0));//CV_RGB(255, 0, 0)红  
			IplImage* imgNo = cvCreateImage(cvSize(cr.width, cr.height), IPL_DEPTH_8U, 3);

			cvSetImageROI(imgSrc, cr);
			//基于给定的矩形设置图像的ROI（感兴趣区域  
			//imgSrc只显示rc标识的一部分  
			cvCopy(imgSrc, imgNo);
			

			IplImage* img_ = cvCreateImage(cvGetSize(imgNo), IPL_DEPTH_8U, 1);
			cvFlip(imgNo, NULL, 0);
			cvCvtColor(imgNo, img_, CV_BGR2GRAY);
			

			Mat M = cvarrToMat(img_);
			//cvtColor(M, M, CV_BGR2GRAY); // 转为灰度图像    
			char szName[1000] = { 0 };
			vector<Mat> a = horizontalProjectionMat(M);
			if (a.size() < 1){
				a.push_back(M);
			}
			if (a.size() == 3){
				a.erase(a.begin());
				a.erase(a.begin() + 1);
			}
			 
			for (int i = 0; i < a.size(); i++)
			{
				vector<Mat> b = verticalProjectionMat(a[i]);
				
				
				for (int j = 0; j<b.size(); j++)
				{
					//getchar();  
					sprintf_s(szName, "c:/Users/asus-pc/Desktop/test/wnd_%d.jpg", idx++);
					//imshow(szName, b[j]);  
					
					resize(b[j], b[j], Size(50, 50), 0, 0, CV_INTER_LINEAR);
					imwrite(szName, b[j]);
					//直接使用b[j]不知道为啥贴不上去。只能先存为图片，再读取图片了
					Mat iii = imread(szName);
					
					
					Mat imageROI;
					Mat black = imread("c:/Users/asus-pc/Desktop/q.png");
					imageROI = black(Rect(10, 10, iii.cols, iii.rows));
					iii.copyTo(imageROI);
					IplImage imgTmp = black;
				
					IplImage *input = cvCloneImage(&imgTmp);
					IplImage* img_gray_s = cvCreateImage(cvGetSize(input), IPL_DEPTH_8U, 1);
					cvCvtColor(input, img_gray_s, CV_BGR2GRAY);
					
					
					cvAdaptiveThreshold(img_gray_s, img_gray_s, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY_INV, 21, 3);//局部阈值
					
					cvSaveImage(szName, img_gray_s);//保存切分的结果   

					cvReleaseImage(&input);
					cvReleaseImage(&imgNo);
				}
			}

			

			cvResetImageROI(imgSrc);

			
			cr = rc;
			max_X = rc.x + rc.width;
			max_Y = rc.y + rc.height;
			cvReleaseImage(&imgNo);
			//到投影处理  

		}


		else{
			//如果在一行或者一块就进行合并，再处理 cr外，rc内  
			if (cr.x > rc.x){
				cr.x = rc.x;//记录最高点  
			}


			if (cr.y > rc.y){
				cr.y = rc.y;//记录最高点  
			}
			if (rc.x + rc.width > max_X){
				max_X = rc.x + rc.width;//记录最低点  
			}

			if (rc.y + rc.height>max_Y){
				max_Y = rc.y + rc.height;//记录最低点  
			}
		}
		
	}
	cvFlip(imgSrc, NULL, 0);
	Mat T = cvarrToMat(imgSrc);
	resize(T, T, Size(900, 900), 0, 0, CV_INTER_LINEAR);
	imshow("name", T);


	cvShowImage("原图", imgSrc);//显示原图  
	cvWaitKey(0);
	cvReleaseMemStorage(&storage);
	cvReleaseImage(&imgSrc); //cvReleaseImage函数只是将IplImage*型的变量值赋为NULL，  
	// 而这个变量本身还是存在的并且在内存中的存储位置不变。  
	cvReleaseImage(&img_gray);
	cvReleaseImage(&img_gray_clone);
	//  
	cvDestroyAllWindows();
	//销毁所有HighGUI窗口  
	cvWaitKey(0);
	return 0;
}


int main()
{//"c:/Users/asus-pc/Desktop/1234.jpg"C:\Users\as、s-pc\Desktop\jpg-t  

	//要切割图的存储路径
	string path = "c:\\Users\\asus-pc\\Desktop\\11";
	intptr_t hFile;
	vector<string> files;
	//文件信息     struct _finddata_t fileinfo;  
	struct _finddata_t fileinfo;  //很少用的文件信息读取结构  
	string p;
	char px[100] = { 0 };
	if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1) {
		do {
			if ((fileinfo.attrib & _A_SUBDIR)) {

			}
			else {
				//files.push_back(fileinfo.name);  
				//files.push_back(p.assign(path).append("\\").append(fileinfo.name));  
				sprintf_s(px, "c:/Users/asus-pc/Desktop/11/%s", fileinfo.name);
				cout << fileinfo.name << endl;
				qiege(px);
			}
		} while (_findnext(hFile, &fileinfo) == 0);
		_findclose(hFile);
	}
	getchar();
	return 0;

}
这段代码，先使用基于连通域的切割找出一行的，再通过基于投影的算法切割出每个汉字。当然还有需要优化的地方。对图片不是很清晰的，清晰还得调整腐蚀次数等等，做的还是不是很完善，供大家参考，代码也比较乱。有些是网上找的代码自己进行了加工。