图片文字切割垂直投影算法

本文介绍了一种基于垂直和水平投影的图像切割方法,用于分割图像中的字符。通过优化算法,实现对文本行的精确切割,并讨论了如何处理长型和窄边图片的特殊情况,确保字符分割的准确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

参考了网上的一些切割法,稍微优化了下,加了写文字合并的方法,还得改进。有时间再来详细说说流程。

原始图:

切割后如图:

代码如下:(希望能给些建议)

import cv2
import numpy as np

class fontcut():
    def __init__(self):
        self.V_PROJECT=1  #垂直
        self.H_PROJECT=2  #水平

    #获取文本的投影用于分割字符(垂直,水平)
    def GetTextProjection(self,src,pos,mode):
        rows, cols = src.shape
        if mode==self.V_PROJECT:
            print("垂直2", rows, cols)
            for i in range(rows):
                for j in range(cols):
                    if src[i,j]>=0 and src[i,j]<=245:
                        pos[j]+=1
        elif mode == self.H_PROJECT:
            print("水平2",rows, cols)
            for i in range(cols):
                for j in range(rows):
                    if src[j,i]>=0 and src[j,i]<=245:
                        pos[j]+=1
        return pos

    # 获取每个分割字符的范围,min_thresh:波峰的最小幅度,min_range:两个波峰的最小间隔
    def GetPeekRange(self,img,vertical_pos, min_thresh=2,min_range=2, is_check=False):
        rows, cols = img.shape
        peek_range = []
        begin = -1
        prev=0
        for i,value in enumerate(vertical_pos):
            if value>=min_thresh and begin==-1:
                begin=i
            elif value>min_thresh and begin!=-1:
                continue
            elif value < min_thresh and begin != -1:
                end = i
                if end - begin >= min_range:
                    tmp = {}
                    tmp["begin"] = begin
                    tmp["end"] = end
                    peek_range.append(tmp)
                begin = -1
            else:
                continue
            #if (i<=rows+2 and i>=rows-2)
        #print(peek_range)
        #检测切割后的坐标
        if is_check:
            peek_range=self.check_peek(peek_range,rows)
            peek_range=self.check_small(peek_range,rows)
        return peek_range

    #检测切割后的坐标(处理长型图片)
    def check_peek(self,peeks,height,cut_ratio=1.3):
        for i,pxy in enumerate(peeks):
            wd=pxy["end"]-pxy["begin"]
            pvalue=wd/height
            if pvalue>=cut_ratio:
                print("pvalue", pvalue)
                # print("pxy",pxy)
                gustxt=round(pvalue)
                if gustxt==1:
                    gustxt+=1
                print("gustxt",gustxt)
                wdre = wd / gustxt
                print("wdre",wdre)
                k=i
                peeks.pop(i)
                for j in range(gustxt):
                    if (wdre>height+2 or wdre<height-2) and gustxt>2:
                        if j==gustxt-1:
                            begin=int(pxy["begin"] + j * height)
                            npxy = {"begin": begin, "end": pxy["end"]}
                            peeks.insert(k, npxy)
                        else:
                            begin = int(pxy["begin"] + j * height)
                            nend=int(pxy["begin"]+(j+1)*height)
                            npxy={"begin":begin,"end":nend}
                            peeks.insert(k, npxy)
                    else:
                        if j==gustxt-1:
                            begin=int(pxy["begin"] + j * wdre)
                            npxy = {"begin": begin, "end": pxy["end"]}
                            peeks.insert(k, npxy)
                        else:
                            begin = int(pxy["begin"] + j * wdre)
                            nend=int(pxy["begin"]+(j+1)*wdre)
                            npxy={"begin":begin,"end":nend}
                            peeks.insert(k, npxy)
                    k+=1
        return peeks

    #检测窄边图片进行合并
    def check_small(self,peeks,height):
        for i, pxy in enumerate(peeks):
            wd = pxy["end"] - pxy["begin"]
            if wd<height/2+3:
                #if i < len(peeks) - 1:
                prev_wd=peeks[i-1]["end"]-peeks[i-1]["begin"]
                #print("prev_wd",prev_wd,wd,height)
                #next_wd=peeks[i+1]["end"]-peeks[i+1]["begin"]
                pz=wd+prev_wd-height
                if pz<4 and pz>-4:
                    print("huoqu",wd + prev_wd, height, pz)
                    if pxy["end"]-peeks[i-1]["begin"]<height+2:
                        npxy = {"begin": peeks[i-1]["begin"], "end": pxy["end"]}
                        del peeks[i-1:i+1]
                        peeks.insert(i-1, npxy)
        return peeks

    # 切割一行
    def cut_one_line(self, src, begin, end):
        rows, cols = src.shape
        crop_img = src[begin:end,0:cols]
        return crop_img

    def save_cut(self,img,id):
        pass

    def CutChar(self,img,v_peek_range,pid):
        rows, cols = img.shape
        for id,vpeek in enumerate(v_peek_range):
            fontxy = (vpeek["begin"],0,vpeek["end"],rows)  # left top right bottom
            #print("pid",pid,id)
            crop_img = img[0:rows,vpeek["begin"]:vpeek["end"]]
            cv2.imwrite("test\\%s%0.3d.png"%(pid,id), crop_img)
        return 0

    def CutSingleChar(self,imgpath):
        img = cv2.imread(imgpath, 0)
        ret, image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        rows, cols = image.shape
        horizion_pos = [0 for i in range(rows)]
        pos=self.GetTextProjection(image, horizion_pos, self.H_PROJECT)
        h_peek_range=self.GetPeekRange(image,pos,10,10)
        print(h_peek_range)
        #print(horizion_pos)
        #将每一文本行切割
        lines_set=[]
        for id,peek_range in enumerate(h_peek_range):
            line = self.cut_one_line(img, peek_range["begin"], peek_range["end"])
            lines_set.append(line)
        for id,line in enumerate(lines_set):
            #if id==0:
                line_rows, line_cols = line.shape
                print("一行",line_rows, line_cols)
                vertical_pos=[0 for k in range(line_cols)]
                pos=self.GetTextProjection(line, vertical_pos, self.V_PROJECT)
                print("chuizhi",pos)
                #cv2.imshow("123213{0}".format(id), line)
                v_peek_range=self.GetPeekRange(line,pos,is_check=True)
                print(v_peek_range)
                chars_set_one=self.CutChar(line, v_peek_range,id)
        cv2.waitKey(0)

imgpath='111.png'
mq=fontcut()
mq.CutSingleChar(imgpath)

 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值