飞浆 paddleocr 识别图片上文字的角度

1. 识别角度,飞浆无法接收路径中带有中文的图片,所以将图片转np.array格式传进去

def paddle_orientation(image_path):
    from paddlex import create_model
    # 飞浆识别图片上文字角度
    if isinstance(pdf_path, str) and pdf_path.endswith('.pdf'):
        pdf_path = pdf_to_image_bytes(pdf_path)
    
    if isinstance(pdf_path, str):
        pil_image = Image.open(pdf_path)
        width, height = pil_image.size
    elif isinstance(pdf_path, bytes):
        image_stream = io.BytesIO(pdf_path)
        pil_image = Image.open(image_stream)
        width, height = pil_image.size
        
    # 图片转np.array
    image_rgb = pil_image.convert('RGB') # 将 RGBA 转换为 RGB(去除透明度通道)
    image_array = np.array(image_rgb)

    model = create_model(model_name="PP-LCNet_x1_0_doc_ori")
    output = model.predict(image_array,  batch_size=1)
    for res in output:
        print(res.json)
        label_names = res.json['res']['label_names']
        return int(label_names[0])

2. 修正角度 可以接收图片或者pdf文件

def pdf_to_image_bytes(pdf_path, page_index=0, dpi=200):
    """将 PDF 指定页面转换为 PNG 格式的字节数据"""
    import fitz  # PyMuPDF
    from PIL import Image
    import io
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_index)  # 读取指定页
    # 设置高分辨率渲染 (DPI)
    zoom = dpi / 72  # 72是PDF的标准DPI
    mat = fitz.Matrix(zoom, zoom)
    # 渲染为RGB图像
    pix = page.get_pixmap(matrix=mat, colorspace="rgb")
    # 直接转换为PIL图像
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

    # 转换为字节流
    img_bytes = io.BytesIO()
    img.save(img_bytes, format="PNG")  # 可改为 JPEG/其他格式
    return img_bytes.getvalue()


def handle_image_size(image_path):
    """ 修改图片的尺寸
    :param image_path:
    :return:
    """
    if isinstance(image_path, str) and image_path.endswith('.pdf'):
        image_path = pdf_to_image_bytes(image_path)
    if isinstance(image_path, str):
        pil_image = Image.open(image_path)
        width, height = pil_image.size
    elif isinstance(image_path, bytes):
        image_stream = io.BytesIO(image_path)
        pil_image = Image.open(image_stream)
        width, height = pil_image.size
    else:
        pil_image = np.array(image_path)
        height, width = pil_image.shape[:2]
    if isinstance(pil_image, np.ndarray):
        pil_image = Image.fromarray(pil_image)

    print(f'pil_imagesize:{width, height}')
    # 修改图片尺寸
    resized_image = pil_image.resize((new_width, new_height))
    image = np.array(resized_image)
    return image

def correct_orientation(image_path, angle):
    """ 修复图片的方向
    :param image_path:
    :param angle:
    """
    img = handle_image_size(image_path)
    # 1 获取图像尺寸
    height, width = img.shape[:2]
    # 2 计算图像中心点
    center = (width / 2, height / 2)
    # 3 创建旋转矩阵(逆时针60度)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)  # 60表示逆时针旋转60度
    # 4 计算旋转后的新图像尺寸
    cos = np.abs(rotation_matrix[0, 0])
    sin = np.abs(rotation_matrix[0, 1])
    # 计算新宽度和高度
    new_width = int((height * sin) + (width * cos))
    new_height = int((height * cos) + (width * sin))
    # 5 调整旋转矩阵以考虑平移(确保图像居中)
    rotation_matrix[0, 2] += (new_width / 2) - center[0]
    rotation_matrix[1, 2] += (new_height / 2) - center[1]
    # 6 执行仿射变换
    rotated_img = cv2.warpAffine(img, rotation_matrix, (new_width, new_height),
                             flags=cv2.INTER_CUBIC,  # 使用三次样条插值,质量更高
                             borderMode=cv2.BORDER_REPLICATE  # 边界处理方式
                             )
    # 7 保存结果
    # cv2.imwrite('lw.jpg', rotated_img)
    print(f"旋转完成!原始尺寸: {width}x{height} → 新尺寸: {new_width}x{new_height}")
    # cv2.imshow('Rotated 60 degrees', rotated_img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return rotated_img

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值