wps dispimg python 解析实现参考

在 wps excel 中,可以把图片嵌入单元格,此时会图片单元格会显示如下内容

=DISPIMG("ID_142D0E21999C4D899C0723FF7FA4A9DD",1)

下面是针对这中图片文件的解析实现

参考博客:Python读取wps中的DISPIMG图片格式_wps dispimg-优快云博客:https://blog.youkuaiyun.com/QAZJOU/article/details/139709948

解析出 dispimg_id

简单的字符串替换实现

def save_dispimg_id(self,cell_value):
        img_id=cell_value.replace('=DISPIMG("',"").replace('",1)',"")
        self.wps_dispimg_id_list.append(img_id)
        pass    

解析出 dispimg_id 对应的图片文件流

import zipfile
from lxml import etree

wps_dispimg_map ={}

def get_wps_dispimg_map(excel_file):
    if len(wps_dispimg_map)>0:
        return
    
    xml_content_namespaces = {
        'xdr': 'http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing',
        'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
        'r':"http://schemas.openxmlformats.org/officeDocument/2006/relationships",
        'etc':"http://www.wps.cn/officeDocument/2017/etCustomData"
    }
    
    ref_xml_content_namespaces = {
        None:"http://schemas.openxmlformats.org/package/2006/relationships",
    }
    # 打开 XLSX 文件
    with zipfile.ZipFile(excel_file, 'r') as zfile:
        xml_content=""
        rel_xml_content=""
        
        # 获取 dispimg_id rId 的映射关系
        with zfile.open('xl/cellimages.xml') as file:
            xml_content = file.read()
            
        # 获取 rId  target 的映射关系
        with zfile.open('xl/_rels/cellimages.xml.rels') as file:
            rel_xml_content = file.read()
            
        xml_content_map={}
        rel_xml_content_map={}
        
        xml_content_root = etree.fromstring(xml_content)
        xdr_pics = xml_content_root.findall(".//xdr:pic",xml_content_namespaces)
        for xdr_pic in xdr_pics:
            dispimg_id = xdr_pic.find('.//xdr:cNvPr', namespaces=xml_content_namespaces).attrib.get('name',None)
            rId = xdr_pic.find('.//a:blip', namespaces=xml_content_namespaces).attrib.get(f'{{{xml_content_namespaces["r"]}}}embed',None)
            if dispimg_id is not None and rId is not None:
                xml_content_map[dispimg_id]=rId
        
        rel_xml_content_root = etree.fromstring(rel_xml_content)
        Relationships=rel_xml_content_root.findall('.//Relationship', namespaces=ref_xml_content_namespaces)
        for Relationship in Relationships:
            rId=Relationship.attrib.get('Id',None)
            Target=Relationship.attrib.get('Target',None)
            if rId is not None and Target is not None:
                rel_xml_content_map[rId]=f"xl/{Target}"
        
        
        for dispimg_id,rId in xml_content_map.items():
            for rId2,Target in rel_xml_content_map.items():
                if rId2 != rId:
                    continue
                if Target is None:
                    continue
                with zfile.open(Target) as img_file:
                    image_binary_data = img_file.read()
                    if image_binary_data is not None and len(image_binary_data)>0:
                        wps_dispimg_map[dispimg_id]=image_binary_data
                        break
                
        return wps_dispimg_map

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值