paddle检测文本框斜率,矫正,paddleHub提取图片信息
import numpy as np
import cv2
import paddlehub as hub
import math
import datetime
import torch
import os
def rot2(x1,x2,y1,y2):
# k = -(y2 - y1) / (x2 - x1)
k = -(y2 - y1) / (x2 - x1)
# 求反正切,再将得到的弧度转换为度
result = np.arctan(k) * 57.29577
print("直线倾斜角度为:" + str(result) + "度")
return float(result)
# 逆时针旋转图像degree角度(原尺寸)
def rotateImage(src, degree):
# 旋转中心为图像中心
h, w = src.shape[:2]
# 计算二维旋转的仿射变换矩阵
RotateMatrix = cv2.getRotationMatrix2D((w / 2.0, h / 2.0), degree, 1)
#print(RotateMatrix)
# 仿射变换,背景色填充为白色
rotate = cv2.warpAffine(src, RotateMatrix, (w, h), borderValue=(255, 255, 255))
return rotate
#检测文本框数量
def rotate_bound_white_bg(imageMat):
text_detector = hub.Module(name="chinese_text_detection_db_mobile")
text_detector.__init__(enable_mkldnn=True)
#获取文本框
results = text_detector.detect_text(
images=[imageMat],
use_gpu=True,
output_dir='detection_result',
box_thresh=0.995,
visualization=True)
#获取最长文本框
x1a = 0.0;
x2a = 0.0
y1a = 0.0
y2a = 0.0
jl = 0.0
allcount = 0
for result in results:
data = result['data']
#文本框数量
print(len(data))
for infomation in data:
for item in range(len(infomation)):
x1 = infomation[0][0]
y1 = infomation[0][1]
x2 = infomation[1][0]
y2 = infomation[1][1]
result = math.sqrt(
math.pow(
x1 -
x2,
2) +
math.pow(
y1 -
y2,
2))
if(result>jl):
jl = result
x1a = x1
x2a = x2
y1a = y1
y2a = y2
#计算最长文本框倾斜角度
jd = rot2(x1a, x2a, y1a, y2a)
#判定是否卧倒
if(jd==0&allcount<50):
jd = 90
return jd
def ocr(imageMat):
# path_list = [args['dataset'] + '/' + path for path in os.listdir(args['dataset'])]
# 加载移动端预训练模型
ocr = hub.Module(name="chinese_ocr_db_crnn_mobile") # chinese_ocr_db_crnn_mobile
results = ocr.recognize_text(
images=[imageMat], # 图片数据,ndarray.shape 为 [H, W, C],BGR格式;
use_gpu=True, # 是否使用 GPU;若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量
output_dir='out', # 图片的保存路径,默认设为 ocr_result;
visualization=True, # 是否将识别结果保存为图片文件;
box_thresh=0.995, # 检测文本框置信度的zz daf阈值;
text_thresh=0.995) # 识别中文文本置信度的阈值;
for result in results:
data = result['data']
print(len(data))
for infomation in data:
print('text: ', infomation['text'], '\nconfidence: ', infomation['confidence'], '\ntext_box_position: ',
infomation['text_box_position'])
def train_on_parameter():
path = "C:\\img\\5.jpg";
print(path)
src = cv2.imread(path)
jd = rotate_bound_white_bg(src)
# 修正图片角度
mat = rotateImage(src, -jd)
ocr(mat)
if __name__ == '__main__':
#设置英伟达显卡计算 屏蔽后需要关闭ocr的gpu加速
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
print(torch.cuda.device_count())
start_t = datetime.datetime.now()
train_on_parameter()
end_t = datetime.datetime.now()
elapsed_sec = (end_t - start_t).total_seconds()
print("多进程计算 共消耗: " + "{:.2f}".format(elapsed_sec) + " 秒")