在csdn上找了很多提取pdf图片的代码,发现有的函数调用不了,可能是因为kits库更新的缘故,在查阅库源代码后,更新了提取代码,如下:
#安装库:pip install fitz PyMuPDF
# 提取图片
import fitz
import re
import os
def pic(pdf_path,save_path):
check1=r"/Subtype(?= */Image)"
check2= r"/Type(?= */XObject)"
pdf=fitz.open(pdf_path)
count=0
lenx=pdf.xref_length()
for i in range(1,lenx):
text = pdf.xref_object(i)
isXObject = re.search(check2, text)
isImage = re.search(check1, text)
if not isXObject or not isImage:
continue
count+=1
pix = fitz.Pixmap(pdf, i)
# 保存图像名
img_name = "img{}.png".format(count)
if pix.n < 5:
try:
pix.writePNG(os.path.join(save_path, img_name))
pix = None
except:
pix0 = fitz.Pixmap(fitz.csRGB, pix)
pix0.writePNG(os.path.join(save_path, img_name))
pix0 = None