1. 识别角度,飞浆无法接收路径中带有中文的图片,所以将图片转np.array格式传进去
def paddle_orientation(image_path):
from paddlex import create_model
# 飞浆识别图片上文字角度
if isinstance(pdf_path, str) and pdf_path.endswith('.pdf'):
pdf_path = pdf_to_image_bytes(pdf_path)
if isinstance(pdf_path, str):
pil_image = Image.open(pdf_path)
width, height = pil_image.size
elif isinstance(pdf_path, bytes):
image_stream = io.BytesIO(pdf_path)
pil_image = Image.open(image_stream)
width, height = pil_image.size
# 图片转np.array
image_rgb = pil_image.convert('RGB') # 将 RGBA 转换为 RGB(去除透明度通道)
image_array = np.array(image_rgb)
model = create_model(model_name="PP-LCNet_x1_0_doc_ori")
output = model.predict(image_array, batch_size=1)
for res in output:
print(res.json)
label_names = res.json['res']['label_names']
return int(label_names[0])
2. 修正角度 可以接收图片或者pdf文件
def pdf_to_image_bytes(pdf_path, page_index=0, dpi=200):
"""将 PDF 指定页面转换为 PNG 格式的字节数据"""
import fitz # PyMuPDF
from PIL import Image
import io
doc = fitz.open(pdf_path)
page = doc.load_page(page_index) # 读取指定页
# 设置高分辨率渲染 (DPI)
zoom = dpi / 72 # 72是PDF的标准DPI
mat = fitz.Matrix(zoom, zoom)
# 渲染为RGB图像
pix = page.get_pixmap(matrix=mat, colorspace="rgb")
# 直接转换为PIL图像
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# 转换为字节流
img_bytes = io.BytesIO()
img.save(img_bytes, format="PNG") # 可改为 JPEG/其他格式
return img_bytes.getvalue()
def handle_image_size(image_path):
""" 修改图片的尺寸
:param image_path:
:return:
"""
if isinstance(image_path, str) and image_path.endswith('.pdf'):
image_path = pdf_to_image_bytes(image_path)
if isinstance(image_path, str):
pil_image = Image.open(image_path)
width, height = pil_image.size
elif isinstance(image_path, bytes):
image_stream = io.BytesIO(image_path)
pil_image = Image.open(image_stream)
width, height = pil_image.size
else:
pil_image = np.array(image_path)
height, width = pil_image.shape[:2]
if isinstance(pil_image, np.ndarray):
pil_image = Image.fromarray(pil_image)
print(f'pil_imagesize:{width, height}')
# 修改图片尺寸
resized_image = pil_image.resize((new_width, new_height))
image = np.array(resized_image)
return image
def correct_orientation(image_path, angle):
""" 修复图片的方向
:param image_path:
:param angle:
"""
img = handle_image_size(image_path)
# 1 获取图像尺寸
height, width = img.shape[:2]
# 2 计算图像中心点
center = (width / 2, height / 2)
# 3 创建旋转矩阵(逆时针60度)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) # 60表示逆时针旋转60度
# 4 计算旋转后的新图像尺寸
cos = np.abs(rotation_matrix[0, 0])
sin = np.abs(rotation_matrix[0, 1])
# 计算新宽度和高度
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# 5 调整旋转矩阵以考虑平移(确保图像居中)
rotation_matrix[0, 2] += (new_width / 2) - center[0]
rotation_matrix[1, 2] += (new_height / 2) - center[1]
# 6 执行仿射变换
rotated_img = cv2.warpAffine(img, rotation_matrix, (new_width, new_height),
flags=cv2.INTER_CUBIC, # 使用三次样条插值,质量更高
borderMode=cv2.BORDER_REPLICATE # 边界处理方式
)
# 7 保存结果
# cv2.imwrite('lw.jpg', rotated_img)
print(f"旋转完成!原始尺寸: {width}x{height} → 新尺寸: {new_width}x{new_height}")
# cv2.imshow('Rotated 60 degrees', rotated_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
return rotated_img