1. 指定页提取文字
import pdfplumber
path = './练习文件/文字.pdf'
# with语句:打开文件不用手动关闭,但要注意缩进
with pdfplumber.open(path) as pdf:
# # 获取首页
first_page = pdf.pages[0]
# 将指定页提取文字
text = first_page.extract_text()
textW = open('./结果文件/1.txt', mode='a', encoding='utf-8')
textW.write(text)
2. 所有页提取文字
import pdfplumber
path = './练习文件/文字.pdf'
with pdfplumber.open(path) as pdf:
for page in pdf.pages:
# 将每页提取文字
text = page.extract_text()
textW = open('./结果文件/2.txt', mode='a', encoding='utf-8')
textW.write(text)