1、使用环境:
ubuntu18.04,4gpu,nvidia410.78,cuda9.0,cudnn7.3,python3.6
2、使用代码:
官方提供的ocr模型代码
https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/ocr_recognition
3、将代码运行起来
为了方便看到运行的效果,我修改了参数,save_model_period,这样可以更快的保存数据,好知道运行是否有效
4、生成自己的数据
import random
import cv2
import numpy as np
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import os
from unit import segmentation
path_font='/home/zz/文字'
path_out='/media/zz/testtttt'
if not os.path.exists(path_out):
os.mkdir(path_out)
dicters='0123456789.'
CHARS='0123456789'
number=100
font_index=0
font_list=os.listdir(path_font)
font_list.sort()
f_ind=0
f_size=0
def get_word(length):
global font_index, CHARS
f = ''
for i in range(length):
f = f + random.choice(CHARS)
font_index = font_index + 1
return f
def get_txt():
txt=''
f3=random.randint(0,2) # 2/3的可能会出现.
len_num=random.randint(1,8)
num=get_word(len_num)
txt=txt+num
if f3>0 and len(num)>=3:
txt=txt[:-2]+'.'+txt[-2:]
return txt
def get_txt_test():
global dicters
return dicters
def get_bg(color, w,h):
# w_l=random.randint(1,500)
# w_r=random.randint(1,500)
# h_t=random.randint(1,50)
# h_b=random.randint(1,50)
w_l=50
w_r=50
h_t=20
h_b=20
bg=np.zeros((h+h_b+h_t,w+w_l+w_r),dtype='uint8')
bg=bg+color
# bg=Image.fromarray(bg)
return bg,w_l,h_t
def oblique(bg,fc):
bg=np.array(bg)
imgh,imgw=bg.shape
new_bg=np.zeros((imgh,imgw+2*imgh),dtype='uint8')
new_bg[:,imgh:imgh+imgw]=bg
step=random.randint(15,25)
st=random.randint(0,step)
while st+imgh<=imgw+2*imgh:
pt1=(st,0)
pt2=(st&#