python学习的第二十五天:对PDF文件的读写操作

python学习的第二十五天:对PDF文件的读写操作

对Excel操作的补充

# Python操作Excel生成统计图表

from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference

wb = Workbook(write_only=True)
ws = wb.create_sheet()

rows = [
    ('类别', '销售A组', '销售B组'),
    ('手机', 40, 30),
    ('平板', 50, 60),
    ('笔记本', 80, 70),
    ('外围设备', 20, 10),
]

for row in rows:
    ws.append(row)

chart1 = BarChart()
chart1.type = 'col'
chart1.style = 10
chart1.title = '销售统计图'
chart1.y_axis.title = '销量'
chart1.x_axis.title = '商品类别'

data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=3)
cats = Reference(ws, min_col=1, min_row=2, max_row=5)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, 'A10')

wb.save('resources/demo.xlsx')

python的实用方法

获取指定文件下的所有内容
import os

files_list = os.listdir('/Users/Hao/Desktop')
for file in files_list:
    fullpath = os.path.abspath(file)
    print(fullpath)
shutil模块(封装了高级的文件操作函数)
import shutil

# 获取命令的路径
print(shutil.which('python'))
# 移动文件
shutil.move('resources/sales_data.csv', 'sales.csv')

PDF的相关操作

操作PDF需要的工具:pypdf2
读取PDF文件并提取其中的文字
import PyPDF2

from PyPDF2.pdf import PageObject

# 打开PDF文件
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
# 读取PDF的页数
for page_num in range(reader.numPages):
    current_page = reader.getPage(page_num)  # type: PageObject
    # 从页面中抽取文字
    # print(current_page.extractText())
    current_page.rotateClockwise(90)
    writer.addPage(current_page)
    writer.addBlankPage()
with open('resources/XGBoost-modified.pdf', 'wb') as file:
    writer.write(file)
PDF的写操作
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas

# 注册字体文件
pdfmetrics.registerFont(TTFont('Font1', 'resources/fonts/Vera.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'resources/fonts/青呱石头体.ttf'))

pdf_canvas = canvas.Canvas('resources/demo.pdf', pagesize=A4)
width, height = A4

# 绘图
image = canvas.ImageReader('resources/guido.jpg')
pdf_canvas.drawImage(image, 20, height - 395, 250, 375)

# 显示当前页
pdf_canvas.showPage()

# 写字
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(1, 0, 0, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好,世界!')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')

# 保存
pdf_canvas.save()
PDF加密
import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.numPages):
    writer.addPage(reader.getPage(page_num))
# 加密PDF文件
writer.encrypt('foobared')
with open('resources/XGBoost-encrypted.pdf', 'wb') as file:
    writer.write(file)
PDF的解密
import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost-encrypted.pdf')
if reader.decrypt('foobared') == 0:
    print('密码错误')
else:
    print(reader.numPages)
PDF增加水印
import PyPDF2

from PyPDF2.pdf import PageObject

reader1 = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
reader2 = PyPDF2.PdfFileReader('resources/watermark.pdf')
writer = PyPDF2.PdfFileWriter()

watermark_page = reader2.getPage(0)
for page_num in range(reader1.numPages):
    current_page = reader1.getPage(page_num)  # type: PageObject
    current_page.mergePage(watermark_page)
    writer.addPage(current_page)

with open('resources/XGBoost-watermarked.pdf', 'wb') as file:
    writer.write(file)
current_page.mergePage(watermark_page)
writer.addPage(current_page)

with open(‘resources/XGBoost-watermarked.pdf’, ‘wb’) as file:
writer.write(file)


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

踏墟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值