import fitz
import numpy as np
from PIL import Image
import cv2
from tqdm import tqdm
import io
pdf_doc = fitz.open('/Users/linql/Desktop/11.pdf')
output_doc = fitz.open()
for orig_page in tqdm(pdf_doc):
zoom = 2
mat = fitz.Matrix(zoom, zoom)
pix = orig_page.get_pixmap(matrix=mat)
img = Image.frombytes("RGB",
[pix.width, pix.height],
pix.samples)
img = img.convert('L')
cleaned_page_array = cv2.adaptiveThreshold(np.array(img),
255,
# cv2.ADAPTIVE_THRESH_MEAN_C, #基于邻域均值的自适应阈值。
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,#基于邻域加权平均的自适应阈值。
cv2.THRESH_BINARY,
19,
15)
new_image = Image.fromarray(cleaned_page_array)
fp = io.BytesIO()
new_image.save(fp, "JPEG")
page = output_doc.new_page(width=new_image.size[0], height=new_image.size[1])
page.insert_image(page.rect, stream=fp.getvalue())
output_doc.save("output_file.pdf")
[记录]基于python的pymupdf和opencv提升PDF的清晰度
最新推荐文章于 2025-01-22 18:09:23 发布