由于python不能直接处理doc,所以先把doc转换成docx
import os
from glob import glob
from win32com import client
# 转换doc为docx
def doc2docx(fn):
word = client.Dispatch("Word.Application") # 打开word应用程序
# for file in files:
doc = word.Documents.Open(fn) # 打开word文件
doc.SaveAs("{}x".format(fn), 12) # 另存为后缀为".docx"的文件,其中参数12或16指docx文件
doc.Close() # 关闭原来word文件
word.Quit()
# 转换docx为doc
def docx2doc(fn):
word = client.Dispatch("Word.Application") # 打开word应用程序
# for file in files:
doc = word.Documents.Open(fn) # 打开word文件
doc.SaveAs("{}".format(fn[:-1]), 0) # 另存为后缀为".docx"的文件,其中参数0指doc
doc.Close() # 关闭原来word文件
word.Quit()
# path = './b.doc'
# abs_path = os.path.abspath(path)
# print(abs_path)
doc_path = './files/*.doc'
doc_li = glob(doc_path)
total_num = len(doc_li)
processed_num = 0
for one_doc in doc_li:
abs_doc = os.path.abspath(one_doc)
# print(abs_doc)
processed_num += 1
print('processed {} / {}'.format(processed_num, total_num))
try:
doc2docx(abs_doc)
except Exception as e:
print(e)
https://pypi.tuna.tsinghua.edu.cn/packages/25/54/177ee28fec4ecd23fa539f3df78067e7a4927515b84eac34e36060fcdb8d/pywin32-300-cp37-cp37m-win_amd64.whl