class DocumentBlockObject(BaseObject):
"""文档块对象
文档块是一个文档从结构上应该视为一个整体的部分,整个文档由这样多个块构成。
在不同的文档对应的部分不同,word中指代整个word。在Excel中指代一个表单,因为每个表单承载了独立的数据。
"""
def __init__(self):
self._file_name = "" # 式样书的名称
self._name = '' # block名称
self._type = "block"
self._header = [] # 页眉
self._footer = [] # 页脚
self._elements = [] # 所有元素的对象列表,按文档顺序装载。例:[TextObject, TableObject]
self._texts: list[TextObject] = [] # 文本对象列表,按文档顺序装载。例:[TextObject]
self._tables: list[TableObject] = [] # 表格对象列表,按文档顺序装载。例:[TableObject]
self._pictures: list[PictureObject] = [] # 图片对象列表,按文档顺序装载rId。例:[PictureObject]
self._graphics: list[GraphicObject] = [] # 图形对象列表,按文档顺序装载。例:[GraphicObject]
self._timing_waves = [] # 时序图对象列表 例:[TimingWaveObject]
self._timing_texts = [] # 时序图行文本对象列表 例:[TimingTextObject]
self._settings = [] # block级别的属性信息
self._layouts = [] # block级别的布局信息
self._styles = [] # block级别的样式信息
self._data_id = 0 # 数据的唯一id
def to_dict(self):
"""
将 DocumentBlockObject 对象转换为字典
"""
return {
"file_name": self._file_name,
"name": self._name,
"type": self._type,
"header": [[element.to_dict() for element in header_list] for header_list in self._header], # 假设列表中的元素为基本数据类型
"footer": [[element.to_dict() for element in footer_list] for footer_list in self._footer],
"elements": [element.to_dict() for element in self._elements], # 处理嵌套对象列表
"texts": [text.to_dict() for text in self._texts], # 文本对象列表
"tables": [table.to_dict() for table in self._tables], # 表格对象列表
"pictures": [picture.to_dict() for picture in self._pictures], # 图片对象列表
"graphics": [graphic.to_dict() for graphic in self._graphics], # 图形对象列表
"timing_waves": [timing_wave.to_dict() for timing_wave in self._timing_waves], # 时序图对象列表
"timing_texts": [timing_text.to_dict() for timing_text in self._timing_texts], # 时序图行文本列表
"settings": self._settings,
"layouts": [layout.to_dict() for layout in self._layouts],
"styles": [style.to_dict() for style in self._styles],
"data_id": self._data_id,
# "position": self._position.to_dict()
}
@classmethod
def from_dict(cls, data):
"""
从字典创建 DocumentBlockObject 实例
"""
obj = cls()
obj._file_name = data.get("file_name", "")
obj._name = data.get("name", '')
obj._type = data.get("type", "block")
obj._header = data.get("header", [])
obj._footer = data.get("footer", [])
# obj._elements = [TextObject.from_dict(e) if e.get("type") == "text"
# else TableObject.from_dict(e) if e.get("type") == "table"
# else PictureObject.from_dict(e) if e.get("type") == "picture"
# else GraphicObject.from_dict(e) if e.get("type") == "graphic"
# else e for e in data.get("elements", [])]
obj._texts = [TextObject.from_dict(t) for t in data.get("texts", [])]
obj._tables = [TableObject.from_dict(t) for t in data.get("tables", [])]
obj._pictures = [PictureObject.from_dict(p) for p in data.get("pictures", [])]
obj._graphics = [GraphicObject.from_dict(g) for g in data.get("graphics", [])]
obj._timing_waves = [TimingWaveObject.from_dict(w) for w in data.get("timing_waves", [])]
obj._timing_texts = [TimingTextObject.from_dict(t) for t in data.get("timing_texts", [])]
obj._settings = data.get("settings", [])
obj._layouts = [LayoutObject.from_dict(l) for l in data.get("layouts", [])]
obj._styles = [StyleObject.from_dict(s) for s in data.get("styles", [])]
obj._data_id = data.get("data_id", 0)
# obj._position = Position.from_dict(data.get("position", {}))
return obj
def __repr__(self):
return f'{self.__class__.__name__}()[NAME="{self._name}"]'
def __str__(self):
return self._name
@property
def id(self):
return self._data_id
@id.setter
def id(self, new_value):
assert type(new_value) == int
self._data_id = new_value
@property
def name(self):
return self._name
@name.setter
def name(self, new_value):
assert type(new_value) == str
self._name = new_value
@property
def file_name(self):
return self._file_name
@file_name.setter
def file_name(self, new_value):
assert type(new_value) is str
self._file_name = new_value
@property
def elements(self):
return self._elements
@elements.setter
def elements(self, new_value):
assert type(new_value) == list
self._elements = new_value
@property
def texts(self):
return self._texts
@texts.setter
def texts(self, new_value):
assert type(new_value) == list
self._texts = new_value
@property
def header(self):
return self._header
@header.setter
def header(self, new_value):
assert type(new_value) is list
self._header = new_value
@property
def footer(self):
return self._footer
@footer.setter
def footer(self, new_value):
assert type(new_value) is list
self._footer = new_value
@property
def tables(self):
return self._tables
@tables.setter
def tables(self, new_value):
assert type(new_value) == list
self._tables = new_value
@property
def pictures(self):
return self._pictures
@pictures.setter
def pictures(self, new_value):
assert type(new_value) == list
self._pictures = new_value
@property
def graphics(self):
return self._graphics
@graphics.setter
def graphics(self, new_value):
assert type(new_value) == list
self._graphics = new_value
@property
def timing_waves(self):
return self._timing_waves
@timing_waves.setter
def timing_waves(self, new_value):
assert type(new_value) == list
self._timing_waves = new_value
@property
def timing_texts(self):
return self._timing_texts
@timing_texts.setter
def timing_texts(self, new_value):
assert type(new_value) == list
self._timing_texts = new_value
@property
def settings(self):
return self._settings
@settings.setter
def settings(self, new_value):
assert type(new_value) == list
self._settings = new_value
@property
def layouts(self):
return self._layouts
@layouts.setter
def layouts(self, new_value):
assert type(new_value) == list
self._layouts = new_value
@property
def styles(self):
return self._styles
@styles.setter
def styles(self, new_value):
assert type(new_value) == list
self._styles = new_value
def add_text(self, text_object):
"""添加文本对象
:param text_object: 文本对象
"""
assert type(text_object) in [list, TextObject]
if text_object:
if isinstance(text_object, list):
for obj in text_object:
self.id += 1
obj.data_id = self.id
self._texts.extend(text_object)
self._elements.extend(text_object)
else:
self.id += 1
text_object.data_id = self.id
self._texts.append(text_object)
self._elements.append(text_object)
def add_table(self, table_object):
"""添加表格对象
:param table_object: 表格对象
"""
assert type(table_object) in [list, TableObject]
if table_object:
if isinstance(table_object, list):
for table in table_object:
self.id += 1
table.data_id = self.id
self.align_table_col(table)
self._tables.extend(table_object)
self._elements.extend(table_object)
else:
self.id += 1
table_object.data_id = self.id
self.align_table_col(table_object)
self._tables.append(table_object)
self._elements.append(table_object)
def add_picture(self, picture_object, is_in_table=False):
"""添加图片对象
:param picture_object: 图片对象
"""
assert type(picture_object) in [list, PictureObject]
if picture_object:
if isinstance(picture_object, list):
for obj in picture_object:
self.id += 1
obj.data_id = self.id
if not is_in_table:
self._pictures.extend(picture_object)
self._elements.extend(picture_object)
else:
self.id += 1
picture_object.data_id = self.id
if not is_in_table:
self._pictures.append(picture_object)
self._elements.append(picture_object)
def add_graphic(self, graphic_object, is_in_table=False):
"""添加图形对象
:param graphic_object: 图形对象
"""
assert type(graphic_object) in [list, GraphicObject]
if graphic_object:
if isinstance(graphic_object, list):
for obj in graphic_object:
self.id += 1
obj.data_id = self.id
if not is_in_table:
self._graphics.extend(graphic_object)
self._elements.extend(graphic_object)
else:
self.id += 1
graphic_object.data_id = self.id
if not is_in_table:
self._graphics.append(graphic_object)
self._elements.append(graphic_object)
def add_timing_wave(self, object):
""" 添加时序图对象
:param object: 时序图对象
"""
assert type(object) in [list, TimingWaveObject]
if object:
if isinstance(object, list):
for obj in object:
self.id += 1
obj.data_id = self.id
self._timing_waves.extend(object)
self._elements.extend(object)
else:
self.id += 1
object.data_id = self.id
self._timing_waves.append(object)
self._elements.append(object)
def add_timing_text(self, object):
""" 添加时序图对象
:param object: 时序图对象
"""
assert type(object) in [list, TimingTextObject]
if object:
if isinstance(object, list):
for obj in object:
self.id += 1
obj.data_id = self.id
self._timing_texts.extend(object)
self._elements.extend(object)
else:
self.id += 1
object.data_id = self.id
self._timing_texts.append(object)
self._elements.append(object)
def align_table_col(self, base_table):
max_col_count = max([len(row.cells) for row in base_table.rows])
for base_row in base_table.rows:
if len(base_row.cells) != max_col_count:
# 匹配行的列数不一致,补齐缺失的cell
add_col_count = abs(len(base_row.cells) - max_col_count)
base_row.cells.extend([CellObject() for _ in range(add_col_count)])
def get_chapter_content(self, text_obj: TextObject) -> List[Union[str, list]]:
""" 获取word文档中一个章节标题对象下的所有子内容, 返回列表。
如变更履历,获取"变更履历"章节下的文本等内容 (只考虑文本、表格)
"""
para_text = ""
table_data = []
total_result = []
# 是章节标题
if getattr(text_obj.layout, "chapter_id", None):
cur_idx = self._elements.index(text_obj) + 1
while cur_idx < len(self._elements):
cur_obj = self._elements[cur_idx]
if isinstance(cur_obj, TextObject):
# 遇到下一个章节标题,则停止
if getattr(cur_obj.layout, "chapter_id", None):
break
para_text += cur_obj.text + "\n"
if table_data:
table_data = []
elif isinstance(cur_obj, TableObject):
if para_text:
total_result.append(para_text)
para_text = ""
for row in cur_obj.rows:
row_data = []
for cell in row.cells:
row_data.append(cell.text)
table_data.append(row_data)
total_result.append(table_data)
cur_idx += 1
if para_text:
total_result.append(para_text)
total_result = [i.strip("\n") if isinstance(i, str) else i for i in total_result]
return total_result
@staticmethod
def is_change_resume(
obj: Union[TextObject, PictureObject, GraphicObject, TableObject, RowObject, CellObject]) -> bool:
""" 判断当前对象是否为变更履历下面的内容,返回bool类型
兼容word & excel
"""
parent_node = None # ""/None/TextObject/DocumentBlockObject
# 文本、图片、图形
if isinstance(obj, (TextObject, PictureObject, GraphicObject, TableObject)):
parent_node = obj.layout.parent_ref
# 表格行对象
elif isinstance(obj, RowObject):
row_parent = obj.layout.parent_ref
if row_parent:
parent_node = row_parent.layout.parent_ref
# 单元格对象
elif isinstance(obj, CellObject):
cell_parent = obj.layout.parent_ref
if cell_parent:
row_parent = cell_parent.layout.parent_ref
if row_parent:
parent_node = row_parent.layout.parent_ref
if isinstance(parent_node, TextObject):
# word
# 兼容多级别章节标题
while parent_node:
if any([True if i in parent_node.text else False for i in CHANGE_RESUME]):
return True
parent_node = parent_node.layout.parent_ref
if isinstance(parent_node, DocumentBlockObject):
return False
else:
return False
elif isinstance(parent_node, DocumentBlockObject):
# excel
return any([True if i in parent_node.name else False for i in CHANGE_RESUME])
return False
@staticmethod
def get_chapter(obj: Union[TextObject, PictureObject, GraphicObject, TableObject, RowObject, CellObject]):
""" 获取通用对象的章节文本对象 """
parent_node = None
# 文本、图片、图形
if isinstance(obj, (TextObject, PictureObject, GraphicObject, TableObject)):
parent_node = obj.layout.parent_ref
# 表格行对象
elif isinstance(obj, RowObject):
row_parent = obj.layout.parent_ref
if row_parent:
parent_node = row_parent.layout.parent_ref
# 单元格对象
elif isinstance(obj, CellObject):
cell_parent = obj.layout.parent_ref
if cell_parent:
row_parent = cell_parent.layout.parent_ref
if row_parent:
parent_node = row_parent.layout.parent_ref
return parent_node
绘制出类图
最新发布