python从TXT创建PDF文件——reportlab

本文介绍使用reportlab创建PDF文件,实现从txt到pdf的转换。因部分电子阅读器不能读取txt文档,该方法支持生成中文目录且可点击跳转,给出详细代码,还提及在windows和python2下测试的注意事项,如字体设置、中文解码等。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

使用reportlab创建PDF文件

电子书一般都是txt格式的,某些电子阅读器不能读取txt的文档,如DPT-RP1。因此本文从使用python实现txt到pdf的转换,并且支持生成目录,目录能够生成连接进行点击(前提是在txt文件中能够知道每个章节的位置),支持中文。

reportlab的使用可以查看reportlab官方文档。txt转pdf详细代码如下:

# coding: utf-8

# setting sts font utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Paragraph
from  reportlab.platypus.tableofcontents import TableOfContents
from  reportlab.platypus import PageBreak
from reportlab.lib.pagesizes import A4

pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font

class MyDocTemplate(BaseDocTemplate):
    def __init__(self, filename, **kw):
        self.allowSplitting = 0
        apply(BaseDocTemplate.__init__, (self, filename), kw)

    # Entries to the table of contents can be done either manually by
    # calling the addEntry method on the TableOfContents object or automatically
    # by sending a 'TOCEntry' notification in the afterFlowable method of
    # the DocTemplate you are using. The data to be passed to notify is a list
    # of three or four items countaining a level number, the entry text, the page
    # number and an optional destination key which the entry should point to.
    # This list will usually be created in a document template's method like
    # afterFlowable(), making notification calls using the notify() method
    # with appropriate data.

    def afterFlowable(self, flowable):
        "Registers TOC entries."
        if flowable.__class__.__name__ == 'Paragraph':
            text = flowable.getPlainText()
            style = flowable.style.name
            if style == 'Heading1':
                level = 0
            elif style == 'simhei':
                level = 1
            else:
                return
            E = [level, text, self.page]
            #if we have a bookmark name append that to our notify data
            bn = getattr(flowable,'_bookmarkName',None)
            if bn is not None: E.append(bn)
            self.notify('TOCEntry', tuple(E))


# this function makes our headings
def doHeading(data, text, sty):
    from hashlib import sha1
    # create bookmarkname
    bn = sha1(text).hexdigest()
    # modify paragraph text to include an anchor point with name bn
    h = Paragraph(text + '<a name="%s"/>' % bn, sty)
    # store the bookmark name on the flowable so afterFlowable can see this
    h._bookmarkName = bn
    data.append(h)

# Page Number
def footer(canvas, doc):
    page_num = canvas.getPageNumber()
    canvas.saveState()
    P = Paragraph("%d" % page_num ,
                  styles['Normal'])
    w, h = P.wrap(doc.width, doc.bottomMargin)
    P.drawOn(canvas, doc.leftMargin + w/2, h)
    canvas.restoreState()

# load txt file
def loadTxt(txt_path):
    with open(txt_path, 'r') as f:
        txt_datas = f.readlines()
    return txt_datas

def toPDF(txt_datas, pdf_path):
    PDF = MyDocTemplate(pdf_path, pagesize=A4)
    frame = Frame(PDF.leftMargin, PDF.bottomMargin, PDF.width, PDF.height,
                  id='normal')
    template = PageTemplate(frames=frame, onPage=footer)
    PDF.addPageTemplates([template])

    data = []

    # table of contents
    toc = TableOfContents()
    # setting contents fontName and fontSize
    toc.levelStyles = [
        ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
           leading=16),
        ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
    data.append(toc) # add contents
    data.append(PageBreak())  #next page

    NUM = 0
    # add txt
    for txt_data in txt_datas:
        txt_data = txt_data.lstrip() # remove left space
        if len(txt_data) == 0: # no text
            continue
        try:
            txt_data = txt_data.decode("gb2312")
        except:
            txt_data = txt_data.decode("gbk")

        if txt_data[0] == u"第" and (u"章" in txt_data):
            doHeading(data, txt_data, styles['simhei'])
        else:
            data.append(Paragraph(txt_data, styles['STSONG']))
        NUM = NUM + 1
        print('{} line'.format(NUM))

    print('Build pdf!')
    PDF.multiBuild(data)

if __name__ == "__main__":
    txt_path = "财运天降.txt".decode("utf8")
    pdf_path = "财运天降.pdf".decode("utf8")
    txt_datas = loadTxt(txt_path)
    toPDF(txt_datas, pdf_path)

本代码在windows和python2下进行测试,主要注意有:

  • 系统默认字体设置:
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
  • 中文字体支持:
pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font
  • 中文目录字体:
    toc.levelStyles = [
        ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
           leading=16),
        ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
    ]
  • 目录定位,这个需要根据你实际的txt文章进行定位修改
        if txt_data[0] == u"第" and (u"章" in txt_data):
  • 中文解码,由于繁体中文不能解码为gb2312,因此使用try-except的方式
        try:
            txt_data = txt_data.decode("gb2312")
        except:
            txt_data = txt_data.decode("gbk")

其效果如下:

网上随便找了个txt文章:
reult1
生成pdf目录:
result2
生成pdf内容:
result3

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值