需求:在网上打印材料时,需要统计要打印的文件页数,手工统计太麻烦了
原始目录结构:

代码:
# -*- coding:utf-8 -*-
"""需要网上批量打印文件,但是每次打印后自己手动数页数很麻烦,干脆写个python小程序,获得文件夹下所有的页数,
同时将非pdf文件转成pdf文件,保存在一个指定文件夹下
"""
import os
import PyPDF2
import comtypes.client
# word转pdf
def convert_word_to_pdf(word_path, pdf_path):
"""
word转pdf
:param word_path: word文件路径
:param pdf_path: 生成pdf文件路径
"""
# print("word_path:%s, pdf_path:%s" % (word_path, pdf_path))
word = comtypes.client.CreateObject("Word.Application")
word.Visible = 0
new_pdf = word.Documents.Open(word_path)
new_pdf.SaveAs(pdf_path, FileFormat=17)
new_pdf.Close()
# ppt 转pdf
def convert_ppt_to_pdf(ppt_path, pdf_path):
# print("ppt_path:%s, pdf_path:%s" % (ppt_path, pdf_path))
ppt = comtypes.client.CreateObject("Powerpoint.Application")
ppt.Visible = 1
new_pdf = ppt.Presentations.Open(ppt_path)
new_pdf.SaveAs(pdf_path, FileFormat=32)
new_pdf.Close()
# 统计文件数
def get_all_file_by_type(path, type=(), get_all_dirs=True):
"""获得以type类型结尾的所有文件,返回一个list"""
file_list = []
for cur_dir, dirs, files in os.walk(path):
for name in files:
fname = os.path.join(cur_dir, name)
print("fname:%s" % fname)
if fname.endswith(type): # 符合指定类型结尾的文件
if fname.endswith("pdf"):
file_list.append(fname)
if fname.endswith("docx"):
pdf_name = os.path.join(convert_pdf_path, name.split(".")[0] + ".pdf")
convert_word_to_pdf(fname, pdf_name)
file_list.append(pdf_name)
if fname.split(".")[-1] in ["ppt", "pptx"]:
pdf_name = os.path.join(convert_pdf_path, name.split(".")[0] + ".pdf")
convert_ppt_to_pdf(fname, pdf_name)
file_list.append(pdf_name)
if not get_all_dirs:
print("跳出循环")
break
print("总共有%d个文件" % file_list.__len__())
return file_list
# 统计页码
def compute_page(path, type=("PDF", "pdf", "docx", "pptx", "ppt"), get_all_dirs=False):
counts = 0
file_list = get_all_file_by_type(path, type, get_all_dirs)
for file in file_list:
try:
reader = PyPDF2.PdfFileReader(file)
if reader.isEncrypted:
reader.decrypt('')
page_num = reader.getNumPages()
print("文件《%s》有%s页" % (file, page_num))
counts += page_num
except Exception as e:
print("文件%s出现异常:%s" % (file, e))
return counts
# 文件夹不存在就创建
def create_path(path):
if not os.path.exists(path):
os.mkdir(path)
if __name__ == '__main__':
# 创建文件夹,用于存放其它类型文件转成pdf类型的文件
convert_pdf_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'convert_pdf\\')
create_path(convert_pdf_path)
# 要统计的文件目录
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'files\\')
print("path:%s" % path)
counts = compute_page(path, get_all_dirs=True)
print("总共%d页" % counts)
测试结果:

748

被折叠的 条评论
为什么被折叠?



