只能说是网上代码进行优化后的究极缝合怪
import sys
#from win32com.client import Dispatch, constants
from pptx import Presentation
from pptx.util import Cm, Pt
import codecs
import pandas as pd
import win32com.client
import textract
import docx2txt
import docx
def change_doc_to_txt(word_path, save_path):
word = win32com.client.Dispatch('Word.Application') # 调用word应用
doc = word.Documents.Open(word_path)
print('保存中。。。')
doc.SaveAs(save_path, 2) # 保存格式为txt
doc.Close()
word.Quit()
def change_docx_to_txt(word_path, save_path):
print('读取中。。。')
doc = docx.Document(word_path)
f = open(save_path, "w")
for paragraph in doc.paragraphs:
f.write(paragraph.text)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
f.write(cell.text)
f.close()
def change_ppt_to_txt(word_path, save_path):
#兼容ppt和pptx
# ppt = win32com.client.Dispatch('PowerPoint.Application')
# pptSel = ppt.Presen

最低0.47元/天 解锁文章
220

被折叠的 条评论
为什么被折叠?



