飞浆 paddleocr 识别图片上文字的角度

原创于 2025-08-06 16:33:40 发布 · 22 阅读

CC 4.0 BY-SA版权

文章标签：

1. 识别角度，飞浆无法接收路径中带有中文的图片，所以将图片转np.array格式传进去

def paddle_orientation(image_path):
    from paddlex import create_model
    # 飞浆识别图片上文字角度
    if isinstance(pdf_path, str) and pdf_path.endswith('.pdf'):
        pdf_path = pdf_to_image_bytes(pdf_path)
    
    if isinstance(pdf_path, str):
        pil_image = Image.open(pdf_path)
        width, height = pil_image.size
    elif isinstance(pdf_path, bytes):
        image_stream = io.BytesIO(pdf_path)
        pil_image = Image.open(image_stream)
        width, height = pil_image.size
        
    # 图片转np.array
    image_rgb = pil_image.convert('RGB') # 将 RGBA 转换为 RGB（去除透明度通道）
    image_array = np.array(image_rgb)

    model = create_model(model_name="PP-LCNet_x1_0_doc_ori")
    output = model.predict(image_array,  batch_size=1)
    for res in output:
        print(res.json)
        label_names = res.json['res']['label_names']
        return int(label_names[0])

2. 修正角度可以接收图片或者pdf文件

def pdf_to_image_bytes(pdf_path, page_index=0, dpi=200):
    """将 PDF 指定页面转换为 PNG 格式的字节数据"""
    import fitz  # PyMuPDF
    from PIL import Image
    import io
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_index)  # 读取指定页
    # 设置高分辨率渲染 (DPI)
    zoom = dpi / 72  # 72是PDF的标准DPI
    mat = fitz.Matrix(zoom, zoom)
    # 渲染为RGB图像
    pix = page.get_pixmap(matrix=mat, colorspace="rgb")
    # 直接转换为PIL图像
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

    # 转换为字节流
    img_bytes = io.BytesIO()
    img.save(img_bytes, format="PNG")  # 可改为 JPEG/其他格式
    return img_bytes.getvalue()


def handle_image_size(image_path):
    """ 修改图片的尺寸
    :param image_path:
    :return:
    """
    if isinstance(image_path, str) and image_path.endswith('.pdf'):
        image_path = pdf_to_image_bytes(image_path)
    if isinstance(image_path, str):
        pil_image = Image.open(image_path)
        width, height = pil_image.size
    elif isinstance(image_path, bytes):
        image_stream = io.BytesIO(image_path)
        pil_image = Image.open(image_stream)
        width, height = pil_image.size
    else:
        pil_image = np.array(image_path)
        height, width = pil_image.shape[:2]
    if isinstance(pil_image, np.ndarray):
        pil_image = Image.fromarray(pil_image)

    print(f'pil_imagesize:{width, height}')
    # 修改图片尺寸
    resized_image = pil_image.resize((new_width, new_height))
    image = np.array(resized_image)
    return image

def correct_orientation(image_path, angle):
    """ 修复图片的方向
    :param image_path:
    :param angle:
    """
    img = handle_image_size(image_path)
    # 1 获取图像尺寸
    height, width = img.shape[:2]
    # 2 计算图像中心点
    center = (width / 2, height / 2)
    # 3 创建旋转矩阵（逆时针60度）
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)  # 60表示逆时针旋转60度
    # 4 计算旋转后的新图像尺寸
    cos = np.abs(rotation_matrix[0, 0])
    sin = np.abs(rotation_matrix[0, 1])
    # 计算新宽度和高度
    new_width = int((height * sin) + (width * cos))
    new_height = int((height * cos) + (width * sin))
    # 5 调整旋转矩阵以考虑平移（确保图像居中）
    rotation_matrix[0, 2] += (new_width / 2) - center[0]
    rotation_matrix[1, 2] += (new_height / 2) - center[1]
    # 6 执行仿射变换
    rotated_img = cv2.warpAffine(img, rotation_matrix, (new_width, new_height),
                             flags=cv2.INTER_CUBIC,  # 使用三次样条插值，质量更高
                             borderMode=cv2.BORDER_REPLICATE  # 边界处理方式
                             )
    # 7 保存结果
    # cv2.imwrite('lw.jpg', rotated_img)
    print(f"旋转完成！原始尺寸: {width}x{height} → 新尺寸: {new_width}x{new_height}")
    # cv2.imshow('Rotated 60 degrees', rotated_img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return rotated_img