python-pptx解析pptx模板

from pptx import Presentation
from pptx.slide import Slide
from pptx.slide import NotesSlide
from pptx.text.text import TextFrame
from pptx.shapes.shapetree import SlideShapes
from pptx.shapes.placeholder import SlidePlaceholder
import json

path = "C:/luo/tmp/tmpl/009999.pptx"
prs = Presentation(path)

def res(obj) -> Slide:
    return obj
def res2(obj) -> NotesSlide:
    return obj
def res3(obj) -> SlideShapes:
    return obj
def res4(obj) -> TextFrame:
    return obj
def res5(obj) -> SlidePlaceholder:
    return obj

page_ids = {i + 1: slide.slide_id for i, slide in enumerate(prs.slides)}
print(page_ids)
for page in page_ids:
    slide = prs.slides.get(page_ids[page])
    slide = res(slide)
    shapes = res3(slide.shapes)
    for shape in shapes:
        shape_text = ""
        if shape.has_text_frame:
            shape_text = shape.text
            # shape.text = ''
        print("page: " + str(page) + ", idx: " + str(shape.placeholder_format.idx) + "," + str(shape_text))

    # 获取 note
    if slide.has_notes_slide and len(str(slide.notes_slide.notes_text_frame.text).strip()) > 0:
        notes_slide = res2(slide.notes_slide)
        tf = res4(notes_slide.notes_text_frame)
        note_json = json.loads(tf.text)
        for jn in note_json:
            print(jn['name'])
        tf.text = '' # 用于 prs.save(path) 做清除 note

output = "C:/luo/tmp/tmpl/009999-gen-report.pptx"
prs.save(output)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值