python学习的第二十五天：对PDF文件的读写操作

本文链接：https://blog.youkuaiyun.com/m0_52863098/article/details/119697791

文章目录

- python学习的第二十五天：对PDF文件的读写操作

python学习的第二十五天：对PDF文件的读写操作

对Excel操作的补充

# Python操作Excel生成统计图表

from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference

wb = Workbook(write_only=True)
ws = wb.create_sheet()

rows = [
    ('类别', '销售A组', '销售B组'),
    ('手机', 40, 30),
    ('平板', 50, 60),
    ('笔记本', 80, 70),
    ('外围设备', 20, 10),
]

for row in rows:
    ws.append(row)

chart1 = BarChart()
chart1.type = 'col'
chart1.style = 10
chart1.title = '销售统计图'
chart1.y_axis.title = '销量'
chart1.x_axis.title = '商品类别'

data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=3)
cats = Reference(ws, min_col=1, min_row=2, max_row=5)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, 'A10')

wb.save('resources/demo.xlsx')

python的实用方法

获取指定文件下的所有内容

import os

files_list = os.listdir('/Users/Hao/Desktop')
for file in files_list:
    fullpath = os.path.abspath(file)
    print(fullpath)

shutil模块（封装了高级的文件操作函数）

import shutil

# 获取命令的路径
print(shutil.which('python'))
# 移动文件
shutil.move('resources/sales_data.csv', 'sales.csv')

PDF的相关操作

操作PDF需要的工具：pypdf2

读取PDF文件并提取其中的文字

import PyPDF2

from PyPDF2.pdf import PageObject

# 打开PDF文件
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
# 读取PDF的页数
for page_num in range(reader.numPages):
    current_page = reader.getPage(page_num)  # type: PageObject
    # 从页面中抽取文字
    # print(current_page.extractText())
    current_page.rotateClockwise(90)
    writer.addPage(current_page)
    writer.addBlankPage()
with open('resources/XGBoost-modified.pdf', 'wb') as file:
    writer.write(file)

PDF的写操作

from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas

# 注册字体文件
pdfmetrics.registerFont(TTFont('Font1', 'resources/fonts/Vera.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'resources/fonts/青呱石头体.ttf'))

pdf_canvas = canvas.Canvas('resources/demo.pdf', pagesize=A4)
width, height = A4

# 绘图
image = canvas.ImageReader('resources/guido.jpg')
pdf_canvas.drawImage(image, 20, height - 395, 250, 375)

# 显示当前页
pdf_canvas.showPage()

# 写字
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(1, 0, 0, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好，世界！')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')

# 保存
pdf_canvas.save()

PDF加密

import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.numPages):
    writer.addPage(reader.getPage(page_num))
# 加密PDF文件
writer.encrypt('foobared')
with open('resources/XGBoost-encrypted.pdf', 'wb') as file:
    writer.write(file)

PDF的解密

import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost-encrypted.pdf')
if reader.decrypt('foobared') == 0:
    print('密码错误')
else:
    print(reader.numPages)

PDF增加水印

import PyPDF2

from PyPDF2.pdf import PageObject

reader1 = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
reader2 = PyPDF2.PdfFileReader('resources/watermark.pdf')
writer = PyPDF2.PdfFileWriter()

watermark_page = reader2.getPage(0)
for page_num in range(reader1.numPages):
    current_page = reader1.getPage(page_num)  # type: PageObject
    current_page.mergePage(watermark_page)
    writer.addPage(current_page)

with open('resources/XGBoost-watermarked.pdf', 'wb') as file:
    writer.write(file)

current_page.mergePage(watermark_page)
writer.addPage(current_page)

with open(‘resources/XGBoost-watermarked.pdf’, ‘wb’) as file:
writer.write(file)