文章目录
python学习的第二十五天:对PDF文件的读写操作
对Excel操作的补充
# Python操作Excel生成统计图表
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
wb = Workbook(write_only=True)
ws = wb.create_sheet()
rows = [
('类别', '销售A组', '销售B组'),
('手机', 40, 30),
('平板', 50, 60),
('笔记本', 80, 70),
('外围设备', 20, 10),
]
for row in rows:
ws.append(row)
chart1 = BarChart()
chart1.type = 'col'
chart1.style = 10
chart1.title = '销售统计图'
chart1.y_axis.title = '销量'
chart1.x_axis.title = '商品类别'
data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=3)
cats = Reference(ws, min_col=1, min_row=2, max_row=5)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, 'A10')
wb.save('resources/demo.xlsx')
python的实用方法
获取指定文件下的所有内容
import os
files_list = os.listdir('/Users/Hao/Desktop')
for file in files_list:
fullpath = os.path.abspath(file)
print(fullpath)
shutil模块(封装了高级的文件操作函数)
import shutil
# 获取命令的路径
print(shutil.which('python'))
# 移动文件
shutil.move('resources/sales_data.csv', 'sales.csv')
PDF的相关操作
操作PDF需要的工具:pypdf2
读取PDF文件并提取其中的文字
import PyPDF2
from PyPDF2.pdf import PageObject
# 打开PDF文件
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
# 读取PDF的页数
for page_num in range(reader.numPages):
current_page = reader.getPage(page_num) # type: PageObject
# 从页面中抽取文字
# print(current_page.extractText())
current_page.rotateClockwise(90)
writer.addPage(current_page)
writer.addBlankPage()
with open('resources/XGBoost-modified.pdf', 'wb') as file:
writer.write(file)
PDF的写操作
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
# 注册字体文件
pdfmetrics.registerFont(TTFont('Font1', 'resources/fonts/Vera.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'resources/fonts/青呱石头体.ttf'))
pdf_canvas = canvas.Canvas('resources/demo.pdf', pagesize=A4)
width, height = A4
# 绘图
image = canvas.ImageReader('resources/guido.jpg')
pdf_canvas.drawImage(image, 20, height - 395, 250, 375)
# 显示当前页
pdf_canvas.showPage()
# 写字
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(1, 0, 0, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好,世界!')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')
# 保存
pdf_canvas.save()
PDF加密
import PyPDF2
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.numPages):
writer.addPage(reader.getPage(page_num))
# 加密PDF文件
writer.encrypt('foobared')
with open('resources/XGBoost-encrypted.pdf', 'wb') as file:
writer.write(file)
PDF的解密
import PyPDF2
reader = PyPDF2.PdfFileReader('resources/XGBoost-encrypted.pdf')
if reader.decrypt('foobared') == 0:
print('密码错误')
else:
print(reader.numPages)
PDF增加水印
import PyPDF2
from PyPDF2.pdf import PageObject
reader1 = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
reader2 = PyPDF2.PdfFileReader('resources/watermark.pdf')
writer = PyPDF2.PdfFileWriter()
watermark_page = reader2.getPage(0)
for page_num in range(reader1.numPages):
current_page = reader1.getPage(page_num) # type: PageObject
current_page.mergePage(watermark_page)
writer.addPage(current_page)
with open('resources/XGBoost-watermarked.pdf', 'wb') as file:
writer.write(file)
current_page.mergePage(watermark_page)
writer.addPage(current_page)
with open(‘resources/XGBoost-watermarked.pdf’, ‘wb’) as file:
writer.write(file)