import win32com import re from win32com.client import Dispatch, constants ppt = win32com.client.Dispatch('PowerPoint.Application') ppt.Visible = 1 pptSel = ppt.Presentations.Open(r"C:\Users\16254\Desktop\in.pptx") win32com.client.gencache.EnsureDispatch('PowerPoint.Application') f = open(r"C:\Users\16254\Desktop\in.txt","w") slide_count = pptSel.Slides.Count print(slide_count) noteList = [] contentList =[] for i in range(1,slide_count + 1): shape_count = pptSel.Slides(i).Shapes.Count note = pptSel.Slides(i).NotesPage.Shapes.Placeholders(2).TextFrame.TextRange.Text noteList.append(note) print(note) print(shape_count) for j in range(1,shape_count + 1): if pptSel.Slides(i).Shapes(j).HasTextFrame: s = pptSel.Slides(i).Shapes(j).TextFrame.TextRange.Text[:] contentList.append(s) print(s) notestr = ''.join(noteList) contentstr = ''.join(contentList) f.write(notestr) f.write(contentstr) outstr = notestr+contentstr outstr.replace(' ', '') print(len(outstr)) f.close() ppt.Quit() char = re.findall(r'[a-zA-Z]',outstr) num = re.findall(r'[0-9]',outstr) blank = re.findall(r' ',outstr) #\u4E00-\u9FFF是中文的范围 chi = re.findall(r'[\u4E00-\u9FFF]',outstr) other = len(outstr)-len(char)-len(num)-len(blank)-len(chi) print("字母:", len(char),"\n数字:", len(num),"\n空格:",len(blank),"\n中文:",len(chi),"\n其他:",other)
利用Python统计PPT中的文字(包括备注)
最新推荐文章于 2024-10-17 13:10:58 发布