参考了网上的一些切割法,稍微优化了下,加了写文字合并的方法,还得改进。有时间再来详细说说流程。
原始图:
切割后如图:
代码如下:(希望能给些建议)
import cv2
import numpy as np
class fontcut():
def __init__(self):
self.V_PROJECT=1 #垂直
self.H_PROJECT=2 #水平
#获取文本的投影用于分割字符(垂直,水平)
def GetTextProjection(self,src,pos,mode):
rows, cols = src.shape
if mode==self.V_PROJECT:
print("垂直2", rows, cols)
for i in range(rows):
for j in range(cols):
if src[i,j]>=0 and src[i,j]<=245:
pos[j]+=1
elif mode == self.H_PROJECT:
print("水平2",rows, cols)
for i in range(cols):
for j in range(rows):
if src[j,i]>=0 and src[j,i]<=245:
pos[j]+=1
return pos
# 获取每个分割字符的范围,min_thresh:波峰的最小幅度,min_range:两个波峰的最小间隔
def GetPeekRange(self,img,vertical_pos, min_thresh=2,min_range=2, is_check=False):
rows, cols = img.shape
peek_range = []
begin = -1
prev=0
for i,value in enumerate(vertical_pos):
if value>=min_thresh and begin==-1:
begin=i
elif value>min_thresh and begin!=-1:
continue
elif value < min_thresh and begin != -1:
end = i
if end - begin >= min_range:
tmp = {}
tmp["begin"] = begin
tmp["end"] = end
peek_range.append(tmp)
begin = -1
else:
continue
#if (i<=rows+2 and i>=rows-2)
#print(peek_range)
#检测切割后的坐标
if is_check:
peek_range=self.check_peek(peek_range,rows)
peek_range=self.check_small(peek_range,rows)
return peek_range
#检测切割后的坐标(处理长型图片)
def check_peek(self,peeks,height,cut_ratio=1.3):
for i,pxy in enumerate(peeks):
wd=pxy["end"]-pxy["begin"]
pvalue=wd/height
if pvalue>=cut_ratio:
print("pvalue", pvalue)
# print("pxy",pxy)
gustxt=round(pvalue)
if gustxt==1:
gustxt+=1
print("gustxt",gustxt)
wdre = wd / gustxt
print("wdre",wdre)
k=i
peeks.pop(i)
for j in range(gustxt):
if (wdre>height+2 or wdre<height-2) and gustxt>2:
if j==gustxt-1:
begin=int(pxy["begin"] + j * height)
npxy = {"begin": begin, "end": pxy["end"]}
peeks.insert(k, npxy)
else:
begin = int(pxy["begin"] + j * height)
nend=int(pxy["begin"]+(j+1)*height)
npxy={"begin":begin,"end":nend}
peeks.insert(k, npxy)
else:
if j==gustxt-1:
begin=int(pxy["begin"] + j * wdre)
npxy = {"begin": begin, "end": pxy["end"]}
peeks.insert(k, npxy)
else:
begin = int(pxy["begin"] + j * wdre)
nend=int(pxy["begin"]+(j+1)*wdre)
npxy={"begin":begin,"end":nend}
peeks.insert(k, npxy)
k+=1
return peeks
#检测窄边图片进行合并
def check_small(self,peeks,height):
for i, pxy in enumerate(peeks):
wd = pxy["end"] - pxy["begin"]
if wd<height/2+3:
#if i < len(peeks) - 1:
prev_wd=peeks[i-1]["end"]-peeks[i-1]["begin"]
#print("prev_wd",prev_wd,wd,height)
#next_wd=peeks[i+1]["end"]-peeks[i+1]["begin"]
pz=wd+prev_wd-height
if pz<4 and pz>-4:
print("huoqu",wd + prev_wd, height, pz)
if pxy["end"]-peeks[i-1]["begin"]<height+2:
npxy = {"begin": peeks[i-1]["begin"], "end": pxy["end"]}
del peeks[i-1:i+1]
peeks.insert(i-1, npxy)
return peeks
# 切割一行
def cut_one_line(self, src, begin, end):
rows, cols = src.shape
crop_img = src[begin:end,0:cols]
return crop_img
def save_cut(self,img,id):
pass
def CutChar(self,img,v_peek_range,pid):
rows, cols = img.shape
for id,vpeek in enumerate(v_peek_range):
fontxy = (vpeek["begin"],0,vpeek["end"],rows) # left top right bottom
#print("pid",pid,id)
crop_img = img[0:rows,vpeek["begin"]:vpeek["end"]]
cv2.imwrite("test\\%s%0.3d.png"%(pid,id), crop_img)
return 0
def CutSingleChar(self,imgpath):
img = cv2.imread(imgpath, 0)
ret, image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
rows, cols = image.shape
horizion_pos = [0 for i in range(rows)]
pos=self.GetTextProjection(image, horizion_pos, self.H_PROJECT)
h_peek_range=self.GetPeekRange(image,pos,10,10)
print(h_peek_range)
#print(horizion_pos)
#将每一文本行切割
lines_set=[]
for id,peek_range in enumerate(h_peek_range):
line = self.cut_one_line(img, peek_range["begin"], peek_range["end"])
lines_set.append(line)
for id,line in enumerate(lines_set):
#if id==0:
line_rows, line_cols = line.shape
print("一行",line_rows, line_cols)
vertical_pos=[0 for k in range(line_cols)]
pos=self.GetTextProjection(line, vertical_pos, self.V_PROJECT)
print("chuizhi",pos)
#cv2.imshow("123213{0}".format(id), line)
v_peek_range=self.GetPeekRange(line,pos,is_check=True)
print(v_peek_range)
chars_set_one=self.CutChar(line, v_peek_range,id)
cv2.waitKey(0)
imgpath='111.png'
mq=fontcut()
mq.CutSingleChar(imgpath)