python docx 拷贝不规则表格

原创于 2025-12-29 22:33:34 发布 · 51 阅读
CC 4.0 BY-SA版权
文章标签：
from docx import Document
from docx.table import _Cell
from copy import deepcopy


def copy_nested_table(source_doc_path, target_doc_path, output_doc_path,
                      source_table_index, source_cell_row, source_cell_col,
                      target_table_index, target_cell_row, target_cell_col):
    """
    从一个文档表格的单元格中的嵌套表格，复制到另一个文档表格的指定单元格中

    参数：
    - source_doc_path: 源文档路径
    - target_doc_path: 目标文档路径
    - output_doc_path: 输出文档路径
    - source_table_index: 源文档中主表格的索引（从0开始）
    - source_cell_row: 源表格中包含嵌套表格的单元格行索引
    - source_cell_col: 源表格中包含嵌套表格的单元格列索引
    - target_table_index: 目标文档中主表格的索引
    - target_cell_row: 目标表格中要插入嵌套表格的单元格行索引
    - target_cell_col: 目标表格中要插入嵌套表格的单元格列索引
    """

    # 1. 打开源文档和目标文档
    source_doc = Document(source_doc_path)
    target_doc = Document(target_doc_path)

    # 2. 获取源文档中的嵌套表格
    source_table = source_doc.tables[source_table_index]
    source_cell = source_table.cell(source_cell_row, source_cell_col)

    # 获取源单元格中的所有嵌套表格
    source_nested_tables = []
    for element in source_cell._element.xpath('.//w:tbl'):
        source_nested_tables.append(element)

    if not source_nested_tables:
        print("源单元格中没有找到嵌套表格")
        return None

    # 取第一个嵌套表格（如果有多个）
    source_nested_table_xml = source_nested_tables[0]

    # 3. 获取目标单元格
    target_table = target_doc.tables[target_table_index]
    target_cell = target_table.cell(target_cell_row, target_cell_col)

    # 4. 清空目标单元格中原有内容
    _clear_cell_contents(target_cell)

    # 5. 将嵌套表格XML复制到目标单元格
    target_cell._element.append(deepcopy(source_nested_table_xml))

    # 6. 保存新文档
    target_doc.save(output_doc_path)
    print(f"嵌套表格已成功复制到 {output_doc_path}")

    return output_doc_path


def _clear_cell_contents(cell):
    """清空单元格中的所有内容"""
    # 删除段落
    for paragraph in cell.paragraphs:
        p = paragraph._element
        p.getparent().remove(p)

    # 删除表格（如果有嵌套表格）
    for table in cell.tables:
        tbl = table._element
        tbl.getparent().remove(tbl)

    # 确保单元格中至少有一个空段落
    cell.text = ''

#
# # 更高级的版本：处理多个嵌套表格和复杂的单元格结构
# class NestedTableCopier:
#     def __init__(self):
#         pass
#
#     def copy_nested_tables_complete(self, source_doc_path, target_doc_path, output_doc_path,
#                                     source_table_idx, source_cell_loc,
#                                     target_table_idx, target_cell_loc,
#                                     copy_all_nested=False):
#         """
#         更完整的嵌套表格复制功能
#
#         参数：
#         - copy_all_nested: 是否复制源单元格中的所有嵌套表格
#         """
#
#         source_doc = Document(source_doc_path)
#         target_doc = Document(target_doc_path)
#
#         # 解析位置参数（支持元组或字典格式）
#         if isinstance(source_cell_loc, tuple):
#             source_row, source_col = source_cell_loc
#         elif isinstance(source_cell_loc, dict):
#             source_row = source_cell_loc['row']
#             source_col = source_cell_loc['col']
#
#         if isinstance(target_cell_loc, tuple):
#             target_row, target_col = target_cell_loc
#         elif isinstance(target_cell_loc, dict):
#             target_row = target_cell_loc['row']
#             target_col = target_cell_loc['col']
#
#         # 获取源表格和单元格
#         source_table = source_doc.tables[source_table_idx]
#         source_cell = source_table.cell(source_row, source_col)
#
#         # 获取目标表格和单元格
#         target_table = target_doc.tables[target_table_idx]
#         target_cell = target_table.cell(target_row, target_col)
#
#         # 清空目标单元格
#         self.clear_cell_completely(target_cell)
#
#         # 获取源单元格中的所有嵌套表格
#         nested_tables_elements = source_cell._element.xpath('.//w:tbl')
#
#         if not nested_tables_elements:
#             print("警告：源单元格中没有找到嵌套表格")
#             return False
#
#         # 复制嵌套表格
#         if copy_all_nested:
#             # 复制所有嵌套表格
#             for table_element in nested_tables_elements:
#                 target_cell._element.append(deepcopy(table_element))
#         else:
#             # 只复制第一个嵌套表格
#             target_cell._element.append(deepcopy(nested_tables_elements[0]))
#
#         # 保存文档
#         target_doc.save(output_doc_path)
#         print(f"操作完成，文档已保存到: {output_doc_path}")
#         return True
#
#     def clear_cell_completely(self, cell):
#         """完全清空单元格内容"""
#         # 删除所有子元素
#         cell._element.clear_content()
#
#         # 添加一个空段落（Word要求单元格不能完全为空）
#         cell._element.append(self._create_empty_paragraph())
#
#     def _create_empty_paragraph(self):
#         """创建一个空的段落元素"""
#         from docx.oxml import OxmlElement
#         from docx.oxml.ns import qn
#
#         p = OxmlElement('w:p')
#         pPr = OxmlElement('w:pPr')
#         p.append(pPr)
#         return p
#
#     def get_cell_nested_table_info(self, doc_path, table_idx, cell_loc):
#         """获取单元格中嵌套表格的信息"""
#         doc = Document(doc_path)
#
#         if isinstance(cell_loc, tuple):
#             row, col = cell_loc
#
#         table = doc.tables[table_idx]
#         cell = table.cell(row, col)
#
#         nested_tables = cell.tables
#         nested_elements = cell._element.xpath('.//w:tbl')
#
#         info = {
#             'cell_text': cell.text,
#             'has_nested_tables': len(nested_tables) > 0,
#             'nested_table_count': len(nested_tables),
#             'nested_table_elements': len(nested_elements),
#             'tables': []
#         }
#
#         for i, tbl in enumerate(nested_tables):
#             table_info = {
#                 'index': i,
#                 'rows': len(tbl.rows),
#                 'columns': len(tbl.columns),
#                 'has_merged_cells': self._check_merged_cells(tbl)
#             }
#             info['tables'].append(table_info)
#
#         return info
#
#     def _check_merged_cells(self, table):
#         """检查表格是否有合并单元格"""
#         # 通过检查网格跨度来判断是否有合并单元格
#         for row in table.rows:
#             for cell in row.cells:
#                 # 检查水平合并
#                 if hasattr(cell, '_tc'):
#                     tc = cell._tc
#                     grid_span = tc.xpath('.//w:gridSpan')
#                     if grid_span:
#                         return True
#         return False


# 使用示例
if __name__ == "__main__":

    SOURCE_DOC = r'E:\2025年事故预案汇总\预案整理后\report_base.docx'  # 源文档路径
    SOURCE_TABLE_IDX = 0  # 源文档中外层表格的索引（第1个表格为0）
    SOURCE_CELL_POS = (0, 0)  # 源外层表格中包含嵌套表格的单元格：第1行第1列（索引0）
    TARGET_DOC = r'E:\overhaul_report-test.docx'  # 目标文档路径
    TARGET_TABLE_IDX = 0  # 目标文档中外层表格的索引
    TARGET_CELL_POS = (2, 0)  # 目标外层表格中要粘贴的单元格：第2行第2列（索引1）
    SAVE_PATH = TARGET_DOC  # 最终保存的文档

    # 示例1：基本使用
    copy_nested_table(
        source_doc_path=r'E:\2025年事故预案汇总\预案整理后\report_base.docx' ,
        target_doc_path=r'E:\overhaul_report-test.docx' ,
        output_doc_path=r'E:\overhaul_report-test.docx' ,
        source_table_index=0,  # 第一个表格
        source_cell_row=0,  # 第二行
        source_cell_col=0,  # 第二列
        target_table_index=0,  # 第一个表格
        target_cell_row=2,  # 第三行
        target_cell_col=0  # 第三列
    )

    # # 示例2：使用高级版本
    # copier = NestedTableCopier()
    #
    # # 先检查源单元格中的嵌套表格信息
    # info = copier.get_cell_nested_table_info(
    #     doc_path='source.docx',
    #     table_idx=0,
    #     cell_loc=(1, 1)  # 第二行第二列
    # )
    # print(f"嵌套表格信息: {info}")
    #
    # # 复制嵌套表格
    # success = copier.copy_nested_tables_complete(
    #     source_doc_path='source.docx',
    #     target_doc_path='target.docx',
    #     output_doc_path='output_complete.docx',
    #     source_table_idx=0,
    #     source_cell_loc={'row': 1, 'col': 1},
    #     target_table_idx=0,
    #     target_cell_loc=(2, 2),
    #     copy_all_nested=False
    # )
    #
    # if success:
    #     print("嵌套表格复制成功！")