读取xml文件

该博客为转载内容,转载自https://my.oschina.net/kyo4321/blog/3063477 ,但未提供更多关键信息。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


# -*- coding: utf-8 -*-
"""
Created on Thu Apr 18 14:41:44 2019

@author: HeyJude
"""

import  xml.dom.minidom
dom = xml.dom.minidom.parse("./word.xml")

root = dom.documentElement
images = root.getElementsByTagName('image')

for image in images:
   print("*****file*****")
   if image.hasAttribute("file"):
      print("File: %s" % image.getAttribute("file"))

for image in images:
   print("*****tag*****")
   if image.hasAttribute("tag"):
      print("Tag: %s" % image.getAttribute("tag"))

#image_info_list = []
#for image in images:
#    file_name = image.getAttribute("file").split("/")[2]
#    file_tag = image.getAttribute("tag")
#    image_info_list.append((file_name, file_tag))

###获取file和tag的取值写入到list中    
image_info_list = []
for image in images:
    file_name = image.getAttribute("file").split("/")[2]
    file_tag = image.getAttribute("tag")
    image_info_list.append((file_name, file_tag))

text_path = "D:/data/xml_0418/"   
for i in range(len(image_info_list)):
    image_name = image_info_list[i][0].split(".")[0]
    text = image_info_list[i][1]
    ##print(image_name)
    f = open(text_path + image_name + '.txt', "w")
    f.write(str(text))
    f.close()
print("Mission Completed!")
####xml参考资料
http://www.runoob.com/python/python-xml.html
https://blog.youkuaiyun.com/mpp_king/article/details/78496711
https://www.cnblogs.com/260554904html/p/8134899.html
###样例数据如下,文件名称为word.xml
<?xml version="1.0" encoding="UTF-8"?>
<imagelist>
  <image file="word/1/1.jpg" tag="22" />
  <image file="word/1/2.jpg" tag="WASHINGTON" />
  <image file="word/1/3.jpg" tag="postgrad" />
  <image file="word/1/4.jpg" tag="study" />
  <image file="word/1/5.jpg" tag="www" />
  <image file="word/1/6.jpg" tag="prospects" />
  <image file="word/1/7.jpg" tag="ac" />
  <image file="word/1/8.jpg" tag="uk" />
  <image file="word/1/9.jpg" tag="3427N" />
  <image file="word/1/10.jpg" tag="STOP" />
  <image file="word/1/11.jpg" tag="EMERGENCY" />
  <image file="word/1/12.jpg" tag="1101" />
  <image file="word/1/13.jpg" tag="PEPSI" />
  <image file="word/1/14.jpg" tag="THE" />
  <image file="word/1/15.jpg" tag="RAB" />
  <image file="word/1/16.jpg" tag="BUTLER" />
  <image file="word/1/17.jpg" tag="BUILDING" />
  <image file="word/1/18.jpg" tag="No" />
  <image file="word/1/19.jpg" tag="smokin" />
  <image file="word/1/20.jpg" tag="beyond" />
  <image file="word/1/21.jpg" tag="thi" />
  <image file="word/1/22.jpg" tag="point" />
  <image file="word/1/23.jpg" tag="Memorex" />
  <image file="word/1/24.jpg" tag="R" />
  <image file="word/1/25.jpg" tag="Part" />
  <image file="word/1/26.jpg" tag="No" />
  <image file="word/1/27.jpg" tag="827240C" />
  <image file="word/1/28.jpg" tag="european" />
  <image file="word/1/29.jpg" tag="top" />
  <image file="word/1/30.jpg" tag="20" />
  <image file="word/1/31.jpg" tag="powered" />
  <image file="word/1/32.jpg" tag="by" />
  <image file="word/1/33.jpg" tag="Memorex" />
  <image file="word/1/34.jpg" tag="COMPACT" />
  <image file="word/1/35.jpg" tag="disc" />
  <image file="word/1/36.jpg" tag="Recordable" />
  <image file="word/1/37.jpg" tag="CD-R" />
  <image file="word/1/38.jpg" tag="PROFESSIONAL" />
  <image file="word/1/39.jpg" tag="RECORDABLE" />
  <image file="word/1/40.jpg" tag="COMPACT" />
  <image file="word/1/41.jpg" tag="DISC" />
  <image file="word/1/42.jpg" tag="UNIVERSAL" />
  <image file="word/1/43.jpg" tag="COMPATIBILITY" />
  <image file="word/1/44.jpg" tag="650" />
  <image file="word/1/45.jpg" tag="MB" />
  <image file="word/1/46.jpg" tag="700" />
  <image file="word/1/47.jpg" tag="MB" />
  <image file="word/1/48.jpg" tag="1X" />
  <image file="word/1/49.jpg" tag="24X" />
  <image file="word/1/50.jpg" tag="COMPATIBLE" />
  <image file="word/1/51.jpg" tag="700" />
  <image file="word/1/52.jpg" tag="CD-R" />
  <image file="word/1/53.jpg" tag="Memorex" />
  <image file="word/1/54.jpg" tag="700" />
  <image file="word/1/55.jpg" tag="1X" />
  <image file="word/1/56.jpg" tag="24" />
  <image file="word/1/57.jpg" tag="Imported" />
  <image file="word/1/58.jpg" tag="Memorex" />
  <image file="word/1/59.jpg" tag="Products" />
  <image file="word/1/60.jpg" tag="Europe" />
  <image file="word/1/61.jpg" tag="5" />
  <image file="word/1/62.jpg" tag="024460" />
  <image file="word/1/63.jpg" tag="000887" />
  <image file="word/1/64.jpg" tag="TESCO" />
  <image file="word/1/65.jpg" tag="RACE" />
  <image file="word/1/66.jpg" tag="FOR" />
  <image file="word/1/67.jpg" tag="LIFE" />
  <image file="word/1/68.jpg" tag="CANCER" />
  <image file="word/1/69.jpg" tag="RESEARCH" />
  <image file="word/1/70.jpg" tag="UK" />
  <image file="word/1/71.jpg" tag="TESCO" />
  <image file="word/1/72.jpg" tag="VALUE" />
  <image file="word/1/73.jpg" tag="Washing" />
  <image file="word/1/74.jpg" tag="up" />
  <image file="word/1/75.jpg" tag="liquid" />
  <image file="word/1/76.jpg" tag="SAFETY" />
  <image file="word/1/77.jpg" tag="NOTICE" />
  <image file="word/1/78.jpg" tag="if" />
  <image file="word/1/79.jpg" tag="you" />
  <image file="word/1/80.jpg" tag="become" />
  <image file="word/1/81.jpg" tag="trapped" />
  <image file="word/1/82.jpg" tag="in" />
  <image file="word/1/83.jpg" tag="the" />
  <image file="word/1/84.jpg" tag="lift" />
  <image file="word/1/85.jpg" tag="please" />
  <image file="word/1/86.jpg" tag="press" />
  <image file="word/1/87.jpg" tag="the" />
  <image file="word/1/88.jpg" tag="alarm" />
  <image file="word/1/89.jpg" tag="button" />
  <image file="word/1/90.jpg" tag="at" />
  <image file="word/1/91.jpg" tag="regular" />
  <image file="word/1/92.jpg" tag="30" />
  <image file="word/1/93.jpg" tag="second" />
  <image file="word/1/94.jpg" tag="intervals" />
  <image file="word/1/95.jpg" tag="The" />
  <image file="word/1/96.jpg" tag="alarm" />
  <image file="word/1/97.jpg" tag="will" />
  <image file="word/1/98.jpg" tag="sound" />
  <image file="word/1/99.jpg" tag="at" />
  <image file="word/1/100.jpg" tag="the" />
  <image file="word/2/101.jpg" tag="Information" />
  <image file="word/2/102.jpg" tag="desk" />
  <image file="word/2/103.jpg" tag="and" />
  <image file="word/2/104.jpg" tag="help" />
  <image file="word/2/105.jpg" tag="will" />
  <image file="word/2/106.jpg" tag="arrive" />
  <image file="word/2/107.jpg" tag="without" />
  <image file="word/2/108.jpg" tag="delay" />
  <image file="word/2/109.jpg" tag="PLEASE" />
  <image file="word/2/110.jpg" tag="DO" />
  <image file="word/2/111.jpg" tag="NOT" />
  <image file="word/2/112.jpg" tag="PANIC" />
  <image file="word/2/113.jpg" tag="MAXIMUM" />
  <image file="word/2/114.jpg" tag="LOAD" />
  <image file="word/2/115.jpg" tag="1600" />
  <image file="word/2/116.jpg" tag="KG" />
  <image file="word/2/117.jpg" tag="OR" />
  <image file="word/2/118.jpg" tag="22" />
  <image file="word/2/119.jpg" tag="PERSONS" />
  <image file="word/2/120.jpg" tag="GUIDELINE" />
  <image file="word/2/121.jpg" tag="5" />
  <image file="word/2/122.jpg" tag="5A" />
  <image file="word/2/123.jpg" tag="5B" />
  <image file="word/2/124.jpg" tag="5A" />
  <image file="word/2/125.jpg" tag="5B" />
  <image file="word/2/126.jpg" tag="4B" />
  <image file="word/2/127.jpg" tag="4" />
  <image file="word/2/128.jpg" tag="Department" />
  <image file="word/2/129.jpg" tag="of" />
  <image file="word/2/130.jpg" tag="Computer" />
  <image file="word/2/131.jpg" tag="Science" />
  <image file="word/2/132.jpg" tag="1" />
  <image file="word/2/133.jpg" tag="REDBACK" />
  <image file="word/2/134.jpg" tag="HOWARD" />
  <image file="word/2/135.jpg" tag="JACOBSON" />
  <image file="word/2/136.jpg" tag="PanaSync" />
  <image file="word/2/137.jpg" tag="E70" />
  <image file="word/2/138.jpg" tag="conditions" />
  <image file="word/2/139.jpg" tag="GARDEN" />
  <image file="word/2/140.jpg" tag="PATH" />
  <image file="word/2/141.jpg" tag="AT" />
  <image file="word/2/142.jpg" tag="GIVERNY" />
  <image file="word/2/143.jpg" tag="CLAUDE" />
  <image file="word/2/144.jpg" tag="MONET" />
  <image file="word/2/145.jpg" tag="LITTER" />
  <image file="word/2/146.jpg" tag="COLCHESTER" />
  <image file="word/2/147.jpg" tag="BOROUGH" />
  <image file="word/2/148.jpg" tag="UK" />
  <image file="word/2/149.jpg" tag="DANCE" />
  <image file="word/2/150.jpg" tag="PROTOTYPE" />
  <image file="word/2/151.jpg" tag="&amp;" />
  <image file="word/2/152.jpg" tag="INSPIRED" />
  <image file="word/2/153.jpg" tag="RECORDS" />
  <image file="word/2/154.jpg" tag="First" />
  <image file="word/2/155.jpg" tag="f" />
  <image file="word/2/156.jpg" tag="Eastern" />
  <image file="word/2/157.jpg" tag="National" />
  <image file="word/2/158.jpg" tag="Bus" />
  <image file="word/2/159.jpg" tag="Times" />
  <image file="word/2/160.jpg" tag="EMERGENCY" />
  <image file="word/2/161.jpg" tag="DOOR" />
  <image file="word/2/162.jpg" tag="CONTROL" />
  <image file="word/2/163.jpg" tag="2N" />
  <image file="word/2/164.jpg" tag="4B" />
  <image file="word/2/165.jpg" tag="526" />
  <image file="word/2/166.jpg" tag="Simon" />
  <image file="word/2/167.jpg" tag="Lucas" />
  <image file="word/2/168.jpg" tag="Department" />
  <image file="word/2/169.jpg" tag="of" />
  <image file="word/2/170.jpg" tag="Computer" />
  <image file="word/2/171.jpg" tag="Science" />
</imagelist>

转载于:https://my.oschina.net/kyo4321/blog/3063477

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值