paddlepaddle使用笔记——使用自己的数据训练ocr模型

最新推荐文章于 2025-03-11 20:19:04 发布

夏颜汐

最新推荐文章于 2025-03-11 20:19:04 发布

阅读量1.1w

点赞数 5

本文链接：https://blog.youkuaiyun.com/Diana_Z/article/details/93195347

版权

1、使用环境：

ubuntu18.04，4gpu，nvidia410.78，cuda9.0，cudnn7.3，python3.6

2、使用代码：

官方提供的ocr模型代码

https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/ocr_recognition

3、将代码运行起来

为了方便看到运行的效果，我修改了参数，save_model_period，这样可以更快的保存数据，好知道运行是否有效

4、生成自己的数据

import random
import cv2
import numpy as np
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import os
from unit import segmentation

path_font='/home/zz/文字'
path_out='/media/zz/testtttt'
if not os.path.exists(path_out):
    os.mkdir(path_out)

dicters='0123456789.'
CHARS='0123456789'
number=100
font_index=0
font_list=os.listdir(path_font)
font_list.sort()
f_ind=0
f_size=0

def get_word(length):
    global font_index, CHARS
    f = ''
    for i in range(length):
        f = f + random.choice(CHARS)
        font_index = font_index + 1
    return f

def get_txt():
    txt=''
    f3=random.randint(0,2) # 2/3的可能会出现.
    len_num=random.randint(1,8)
    num=get_word(len_num)
    txt=txt+num
    if f3>0 and len(num)>=3:
        txt=txt[:-2]+'.'+txt[-2:]
    return txt


def get_txt_test():
    global dicters
    return dicters

def get_bg(color, w,h):
    # w_l=random.randint(1,500)
    # w_r=random.randint(1,500)
    # h_t=random.randint(1,50)
    # h_b=random.randint(1,50)
    w_l=50
    w_r=50
    h_t=20
    h_b=20
    bg=np.zeros((h+h_b+h_t,w+w_l+w_r),dtype='uint8')
    bg=bg+color
    # bg=Image.fromarray(bg)
    return bg,w_l,h_t

def oblique(bg,fc):
    bg=np.array(bg)
    imgh,imgw=bg.shape
    new_bg=np.zeros((imgh,imgw+2*imgh),dtype='uint8')
    new_bg[:,imgh:imgh+imgw]=bg
    step=random.randint(15,25)
    st=random.randint(0,step)

    while st+imgh<=imgw+2*imgh:
        pt1=(st,0)
        pt2=(st&#

最低0.47元/天解锁文章