paddle检测文本框斜率，矫正，paddleHub提取图片信息GPU加速

本文链接：https://blog.youkuaiyun.com/adminuis/article/details/116117371

paddle检测文本框斜率，矫正，paddleHub提取图片信息



import numpy as np
import cv2
import paddlehub as hub
import math
import datetime
import torch
import os


def rot2(x1,x2,y1,y2):
    # k = -(y2 - y1) / (x2 - x1)
    k = -(y2 - y1) / (x2 - x1)
    # 求反正切，再将得到的弧度转换为度
    result = np.arctan(k) * 57.29577
    print("直线倾斜角度为：" + str(result) + "度")
    return float(result)

# 逆时针旋转图像degree角度（原尺寸）
def rotateImage(src, degree):
    # 旋转中心为图像中心
    h, w = src.shape[:2]
    # 计算二维旋转的仿射变换矩阵
    RotateMatrix = cv2.getRotationMatrix2D((w / 2.0, h / 2.0), degree, 1)
    #print(RotateMatrix)
    # 仿射变换，背景色填充为白色
    rotate = cv2.warpAffine(src, RotateMatrix, (w, h), borderValue=(255, 255, 255))
    return rotate

#检测文本框数量
def rotate_bound_white_bg(imageMat):
    text_detector = hub.Module(name="chinese_text_detection_db_mobile")
    text_detector.__init__(enable_mkldnn=True)
    #获取文本框
    results = text_detector.detect_text(
                images=[imageMat],
                use_gpu=True,
                output_dir='detection_result',
                box_thresh=0.995,
                visualization=True)
    #获取最长文本框
    x1a = 0.0;
    x2a = 0.0
    y1a = 0.0
    y2a = 0.0
    jl = 0.0
    allcount = 0
    for result in results:
        data = result['data']
        #文本框数量
        print(len(data))
        for infomation in data:
            for item in range(len(infomation)):
                x1 = infomation[0][0]
                y1 = infomation[0][1]
                x2 = infomation[1][0]
                y2 = infomation[1][1]
                result = math.sqrt(
                    math.pow(
                        x1 -
                        x2,
                        2) +
                    math.pow(
                        y1 -
                        y2,
                        2))
                if(result>jl):
                    jl = result
                    x1a = x1
                    x2a = x2
                    y1a = y1
                    y2a = y2
    #计算最长文本框倾斜角度
    jd = rot2(x1a, x2a, y1a, y2a)
    #判定是否卧倒
    if(jd==0&allcount<50):
        jd = 90
    return jd



def ocr(imageMat):
    # path_list = [args['dataset'] + '/' + path for path in os.listdir(args['dataset'])]
    # 加载移动端预训练模型
    ocr = hub.Module(name="chinese_ocr_db_crnn_mobile")  # chinese_ocr_db_crnn_mobile
    results = ocr.recognize_text(
        images=[imageMat],  # 图片数据，ndarray.shape 为 [H, W, C]，BGR格式；
        use_gpu=True,  # 是否使用 GPU；若使用GPU，请先设置CUDA_VISIBLE_DEVICES环境变量
        output_dir='out',  # 图片的保存路径，默认设为 ocr_result；
        visualization=True,  # 是否将识别结果保存为图片文件；
        box_thresh=0.995,  # 检测文本框置信度的zz  daf阈值；
        text_thresh=0.995)  # 识别中文文本置信度的阈值；
    for result in results:
        data = result['data']
        print(len(data))
        for infomation in data:
            print('text: ', infomation['text'], '\nconfidence: ', infomation['confidence'], '\ntext_box_position: ',
                  infomation['text_box_position'])


def train_on_parameter():
    path = "C:\\img\\5.jpg";
    print(path)
    src = cv2.imread(path)
    jd = rotate_bound_white_bg(src)
    # 修正图片角度
    mat = rotateImage(src, -jd)

    ocr(mat)




if __name__ == '__main__':
    #设置英伟达显卡计算 屏蔽后需要关闭ocr的gpu加速
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
    print(torch.cuda.device_count())
    start_t = datetime.datetime.now()




    train_on_parameter()


    end_t = datetime.datetime.now()
    elapsed_sec = (end_t - start_t).total_seconds()
    print("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")