PyPDF2 merge pdf and keep bookmarks

关于python 处理pdf的第三方库有很多,基础的是ReportLab
参考 reportlab user guide
Py2pdf 实际上是ReportLab 的Demo, ReportLab中没有直接合并pdf和添加bookmarks的工具, Py2pdf中有经过二次开发得到的这样的工具

from __future__ import print_function
from sys import argv
from PyPDF2 import PdfFileMerger, PdfFileReader
import os
class bookmark_class:
    def __init__(self, title, page_num, parent = 0):
        self.page_num = page_num
        self.parent = parent
        self.title = title
        
    def print_content(self):
        print("page_num: ", self.page_num)
        print("parent: ", self.parent)
        print("title: ", self.title)

# get bookmark info from the pdf outlines
def bookmark_list(bookmark_list, parent = 0):
    print("bookmark:", bookmark_list)
    result = []
    parent_suanz = parent
    for item in bookmark_list:
        if isinstance(item, list):
            result += bookmark_list(item, parent_suanz)
        else:
            bookmark = bookmark_class(item.get("/Title"), item.get("/Page"), 
                                      parent)
            result.append(bookmark)
            print("bookmark_content:")
            bookmark.print_content()
        parent_suanz = result[-1].page_num
    return result

# merge pdf, keep origin bookmarks and add a higher bookmark for each pdf
# if there is marks, one mark corresponds to one pdf
def merge_pdf(pdfs, target_file = "res.pdf", marks = None):
    if pdfs:
        merger = PdfFileMerger()
        page_num = 0
        bookmark_r = [None]
        for index, pdf_file in enumerate(pdfs):
            if os.path.isfile(pdf_file):
                try:
                    file_suanz = PdfFileReader(open(pdf_file, 'rb'))
                    suanz_num = file_suanz.getNumPages()
                    print("suanz_num, page_num: ", suanz_num, page_num)
                    bookmark_r += [None]*suanz_num
                    bookmarks = bookmark_list(file_suanz.getOutlines(), 
                                              page_num)
                    merger.append(file_suanz)
                    print("give bookmark pdffile, ", page_num)
                    mark = pdf_file
                    if marks:
                        if len(marks) > index:
                            mark = marks[index]
                    bookmark_r[page_num] = merger.addBookmark(mark, 
                                                              page_num)
                    for bookmark in bookmarks:
                        print("give back bookmark, ", 
                              page_num + bookmark.page_num)
                        print("use back bookmark, ", bookmark.parent)
                        bookmark_suanz = merger.addBookmark(
                            bookmark.title, 
                            page_num + bookmark.page_num, 
                            bookmark_r[bookmark.parent])
                        if not bookmark_r[page_num + bookmark.page_num]:
                            bookmark_r[page_num + bookmark.page_num] = \
                            bookmark_suanz
                    page_num += suanz_num
                except:
                    print("merge pdf, but {} read failed".format(pdf_file))
        merger.write(target_file)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值