from __future__ import print_function import sys import pygame import random as pyrandom import glob import sys import os import re import codecs import traceback import argparse import numpy as np import matplotlib.pyplot as plt from PIL import Image from PIL import ImageFont,ImageDraw from scipy.ndimage import filters,measurements,interpolation from scipy.misc import imsave #<fontname> <italic> bold fixed serif fraktur parser = argparse.ArgumentParser(description = "Generate text line training data") parser.add_argument('-o','--base',default='linegen',help='output directory, default: %(default)s') parser.add_argument('-r','--distort',type=float,default=1.0) parser.add_argument('-R','--dsigma',type=float,default=20.0) #SysFont(name, size, bold=False, italic=False, constructor=None): #pygame.init() font='arial:bold' #font = pygame.font.SysFont("arial",48) parser.add_argument('-f','--fonts',default=font) parser.add_argument('-F','--fontlist',default=None) text='hello word 泸州四川' parser.add_argument('-t','--texts',default=text) parser.add_argument('-T','--textlist',default=None) parser.add_argument('-m','--maxlines',default=200,type=int, help='max # lines for each directory, default: %(default)s') parser.add_argument('-e','--degradations',default="lo", help="lo, med, or hi; or give a file, default: %(default)s") parser.add_argument('-j','--jitter',default=0.5) parser.add_argument('-s','--sizes',default="40-70") parser.add_argument('-d','--display',action="store_true") parser.add_argument('--numdir',action="store_true") parser.add_argument('-C','--cleanup',default='[_~#]') parser.add_argument('-D','--debug_show',default=None, help="select a class for stepping through") args = parser.parse_args() print(args) if "-" in args.sizes: lo,hi = args.sizes.split("-") sizes = range(int(lo),int(hi)+1) else: sizes = [int(x) for x in args.sizes.split(",")] if args.degradations=="lo": # sigma +/- threshold +/- deglist = """ 0.5 0.0 0.5 0.0 """ elif args.degradations=="med": deglist = """ 0.5 0.0 0.5 0.05 1.0 0.3 0.4 0.05 1.0 0.3 0.5 0.05 1.0 0.3 0.6 0.05 """ elif args.degradations=="hi": deglist = """ 0.5 0.0 0.5 0.0 1.0 0.3 0.4 0.1 1.0 0.3 0.5 0.1 1.0 0.3 0.6 0.1 1.3 0.3 0.4 0.1 1.3 0.3 0.5 0.1 1.3 0.3 0.6 0.1 """ elif args.degradations is not None: with open(args.degradations) as stream: deglist = stream.read() degradations = [] for deg in deglist.split("\n"): deg = deg.strip() if deg=="": continue deg = [float(x) for x in deg.split()] degradations.append(deg) if args.fonts is not None: fonts = [] for pat in args.fonts.split(':'): print(pat) if pat=="": continue fonts+='arial' # fonts += sorted(glob.glob('C://Windows//Fonts//'+pat)) elif args.fontlist is not None: with open(args.fontlist) as fh: lines = (line.strip() for line in fh) fonts = [line for line in lines if line] else: print("use -f or -F arguments to specify fonts") sys.exit(1) assert len(fonts)>0,"no fonts?" print("fonts", fonts) if args.texts is not None: texts = [] for pat in args.texts.split(':'): print('pat:'+glob.glob(pat)) if pat=="": continue texts += sorted(glob.glob(pat)) elif args.textlist is not None: texts = re.split(r'\s*\n\s*',open(args.textlist).read()) else: print("use -t or -T arguments to specify texts") sys.exit(1) assert len(texts)>0,"no texts?" lines = [] for text in texts: print("# reading", text) with codecs.open(text,'r','utf-8') as stream: for line in stream.readlines(): line = line.strip() line = re.sub(args.cleanup,'',line) if len(line)<1: continue lines.append(line) print("got", len(lines), "lines") assert len(lines)>0 lines = list(set(lines)) print("got", len(lines), "unique lines") def rgeometry(image,eps=0.03,delta=0.3): m = np.array([[1+eps*np.random.randn(),0.0],[eps*np.random.randn(),1.0+eps*np.random.randn()]]) w,h = image.shape c = np.array([w/2.0,h/2]) d = c-np.dot(m,c)+np.array([np.random.randn()*delta,np.random.randn()*delta]) return interpolation.affine_transform(image,m,offset=d,order=1,mode='constant',cval=image[0,0]) def rdistort(image,distort=3.0,dsigma=10.0,cval=0): h,w = image.shape hs = np.random.randn(h,w) ws = np.random.randn(h,w) hs = filters.gaussian_filter(hs,dsigma) ws = filters.gaussian_filter(ws,dsigma) hs *= distort/np.amax(hs) ws *= distort/np.amax(ws) def f(p): return (p[0]+hs[p[0],p[1]],p[1]+ws[p[0],p[1]]) return interpolation.geometric_transform(image,f,output_shape=(h,w), order=1,mode='constant',cval=cval) if args.debug_show: plt.ion() plt.gray() base = args.base print("base", base) os.system("rm -rf "+base) os.mkdir(base) def crop(image,pad=1): [[r,c]] = measurements.find_objects(np.array(image==0,'i')) r0 = r.start r1 = r.stop c0 = c.start c1 = c.stop image = image[r0-pad:r1+pad,c0-pad:c1+pad] return image last_font = None last_size = None last_fontfile = None def genline(text,fontfile=None,size=36,sigma=0.5,threshold=0.5): global image,draw,last_font,last_fontfile if last_fontfile!=fontfile or last_size!=size: last_font = ImageFont.truetype(fontfile,size) last_fontfile = fontfile font = last_font image = Image.new("L",(6000,200)) draw = ImageDraw.Draw(image) draw.rectangle((0,0,6000,6000),fill="white") # print("\t", size, font) draw.text((250,20),text,fill="black",font=font) a = np.asarray(image,'f') a = a*1.0/np.amax(a) if sigma>0.0: a = filters.gaussian_filter(a,sigma) a += np.clip(np.random.randn(*a.shape)*0.2,-0.25,0.25) a = rgeometry(a) a = np.array(a>threshold,'f') a = crop(a,pad=3) # FIXME add grid warping here # clf(); ion(); gray(); imshow(a); ginput(1,0.1) del draw del image return a lines_per_size = args.maxlines//len(sizes) for pageno,font in enumerate(fonts): if args.numdir: pagedir = "%s/%04d"%(base,pageno+1) else: fbase = re.sub(r'^[./]*','',font) fbase = re.sub(r'[.][^/]*$','',fbase) fbase = re.sub(r'[/]','_',fbase) pagedir = "%s/%s"%(base,fbase) os.mkdir(pagedir) print("===", pagedir, font) lineno = 0 while lineno<args.maxlines: (sigma,ssigma,threshold,sthreshold) = pyrandom.choice(degradations) sigma += (2*np.random.rand()-1)*ssigma threshold += (2*np.random.rand()-1)*sthreshold line = pyrandom.choice(lines) size = pyrandom.choice(sizes) with open(pagedir+".info","w") as stream: stream.write("%s\n"%font) try: image = genline(text=line,fontfile=font, size=size,sigma=sigma,threshold=threshold) except: traceback.print_exc() continue if np.amin(image.shape)<10: continue if np.amax(image)<0.5: continue if args.distort>0: image = rdistort(image,args.distort,args.dsigma,cval=np.amax(image)) if args.display: plt.gray() plt.clf() plt.imshow(image) plt.ginput(1,0.1) fname = pagedir+"/01%04d"%lineno imsave(fname+".bin.png",image) gt = ocrolib.normalize_text(line) with codecs.open(fname+".gt.txt","w",'utf-8') as stream: stream.write(gt+"\n") print("%5.2f %5.2f %3d\t%s" % (sigma, threshold, size, line)) lineno += 1
转载于:https://my.oschina.net/goudingcheng/blog/1607592