场景
获取文件中的文本内容(只读不写)
环境:WIN10+anaconda
安装:pip install pdfminer3k
from io import StringIO
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
def read_pdf(path_pdf):
with open(path_pdf, 'rb') as pdf:
# PDF资源管理器
rsrcmgr = PDFResourceManager()
# 输出str到内存
outfp = StringIO()
# 解析PDF的参数
laparams = LAParams<