xlrd解析excel文档

最新推荐文章于 2025-06-05 16:30:24 发布

Jasonfang0118

最新推荐文章于 2025-06-05 16:30:24 发布

阅读量1.3k

点赞数 1

CC 4.0 BY-SA版权

分类专栏：程序&算法文章标签： excel file string branch access xml

本文链接：https://blog.youkuaiyun.com/rudyn/article/details/7755789

程序&算法专栏收录该内容

52 篇文章

订阅专栏

本文介绍了一种使用Python从Excel表中读取数据并转换为特定格式XML文件的方法。利用xlrd库解析Excel表格，根据定义的数据类型和访问权限等信息生成XML节点及属性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

python解析excel表，然后创建一件xml文件，当然这个文件是具有特定的格式的，以工作中用到的一类为例，整了个比较简单的~目前我只实现了单实例的生成方法，具有多级的实例没有做，以后有时间再细究，这里会生成两个文件，一个是xml，一个是文本，文本文件记录了各实例的子属性~采用行方法写入文件~！用到的是一个xlrd的python库，功能还是很强大的，这也是我用python解决这个问题的原因！

#!/usr/bin/python

import sys
import xlrd

''' Define the necessary database '''
DataTypeValue={'string':'String','unsignedInt':'Unsigned','Boolean':'Bool','Int':'Int','dateTime':'dateTime'}
AccessTypeValue={'r':'readOnly','w':'readWrite'}
NOTIFICATION="none"
NEEDREBOOT="no"
ACCESSLIST="subscriber"
ACSSPECIAL=""
StyleValue={'w':'status','r':'configuration'} 
DEFAULT_VALUE_RANGE={'String':'0-255','Unsigned':'0-4294967295','Int':'-32768-32767','Bool':'0-1','dateTime':'0-255'}
DEFAULT_BOOL_VALUE='0'
DEFAULT_STRING_VALUE=''
MAX_UNINT_VALUE='4294967295'
MAX_INT_VALUE='32767' #Assumed to be 32767 here temporary
CELL_VALUE_TYPE=1
CELL_RW_TYPE=2


class Node():
    def __init__(self,name,type,acsspecial,nextlevelnum):
        self.name=name
        self.type="Object"
        self.acsspecial=ACSSPECIAL
        self.nextlevelnum=nextlevelnum

class Attribute():
    def __init__(self,name,type,access,notification,needReboot,accessList,style,valueRange,acsSpecial,defaultValue):
        self.name=name
        self.type=type
        self.access=access
        self.notification=NOTIFICATION #default to none
        self.needReboot=NEEDREBOOT      #default to no
        self.accessList=ACCESSLIST
        self.style=style
        self.valueRange=valueRange
        self.acsSpecial=ACSSPECIAL  #default to null
        self.defaultValue=defaultValue

class NodeLevelInfo():
    def __init__(self,name,nextlevelnum):
        self.name=name
        self.nextlevelnum=nextlevelnum

'''Just for debugging'''
def print_node_info(in_para):
    print "Node name:",in_para.name,
    print "acsSpecial:",in_para.acsspecial
    print "NextLevelNum:",in_para.nextlevelnum
    print "-----------------------------------------"

def print_attr_info(in_para):
    print "Member name:",in_para.name
    print "DataType:",in_para.type
    print "accessType:",in_para.access
    print "Notification:",in_para.notification
    print "NeedReboot:",in_para.needReboot
    print "AccessList:",in_para.accessList
    print "Style:",in_para.style
    print "ValueRange:",in_para.valueRange
    print "acsSpecial:",in_para.acsSpecial
    print "DefaultValue:",in_para.defaultValue
    print "-----------------------------------------"

def NodeToString(data):
    return '<'+data.name+' '+'type='+"\""+data.type+"\""+' '+'acsSpecial='+"\""+data.acsspecial+"\""+' '+'nextLevelNum='+"\""+repr(data.nextlevelnum)+"\""+'>'+'\n'    

def NodeTailString(node_name):
    return '</'+node_name+' '+ '>'+'\n'

def AttrToString(data):
    string_out = '<'+data.name+' '+'type='+"\""+data.type+"\""+' '+'access='+"\""+data.access+"\""+' '+'notification='+"\""+data.notification+"\""+' '+'needReboot='+"\""+data.needReboot+"\""+ \
            ' '+'accessList='+"\""+data.accessList+"\""+' '+'style='+"\""+data.style+"\""+' '
    if data.type == 'String' or data.type == 'dateTime':
        string_out += 'lengthRange='+"\""+data.valueRange+"\""+' '
    else:
        string_out += 'valueRange='+"\""+data.valueRange+"\""+' '

    string_out += 'acsSpecial='+"\""+data.acsSpecial + "\""+' '+'defaultValue='+'"'+data.defaultValue+"\""+'/>'+'\n'
    
    return string_out

def LevelInfoToString(data):
    return 'Node:'+data.name+', '+'NextLevelNum:'+repr(data.nextlevelnum)+'\n'

def write_data_file(data,filename, mode):
    if len(filename)==0 or mode=='r':
        return
    fobj = open(filename, mode)  
    fobj.writelines(data) 
    #fobj.write(data)
    fobj.close() 
    
def retrieve_attribute_instance(row_index, sh):
    #print "Retrieve attribute data"
    #VALID_COLUM=3  #We just need the pre-three colums,actually.
    #print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE))
    
    name=TMP_NODE_BASENAME+str.strip()
    #type=''access='',style='',valueRange='',defaultValue=''
    #get value type.
    str_valuetype=sh.cell_value(row_index,CELL_VALUE_TYPE)
    if 'string'in str_valuetype:
        type=DataTypeValue['string']
    elif 'unsignedInt' in str_valuetype:
        type=DataTypeValue['unsignedInt']
    elif 'Boolean' in str_valuetype:
        type=DataTypeValue['Boolean']
    elif 'Int' in str_valuetype:
        type=DataTypeValue['Int']
    elif 'dateTime' in str_valuetype:
        type=DataTypeValue['dateTime']

    #get valueRange
    start_index=-1
    colon_index=-1
    end_index=-1
    if '[' in str_valuetype:
        start_index=str_valuetype.find('[')
        end_index=str_valuetype.find(']')
    if '(' in str_valuetype:
        start_index=str_valuetype.find('(')
        end_index=str_valuetype.find(')')
    if ':' in str_valuetype:
        colon_index=str_valuetype.find(':')

    if start_index==-1 or end_index==-1:
        #No specified valueRange,set to default.
        #print name
        valueRange=DEFAULT_VALUE_RANGE[type]
    else:
        if colon_index == 0 or colon_index == -1:
            #No colon tag found,just return the value
            #print 'name:%s,valueType:%s' % (name,str_valuetype)
            #print 'start_index:%d,end_index:%d,str:%s' % (start_index, end_index, str_valuetype[start_index+1:end_index])
            valueRange='0'+'-'+str_valuetype[start_index+1:end_index]
        else:
            #colon tag is found,need to reallocate the range value.
            if len(str_valuetype[start_index+1:colon_index])==0:
                valueRange='0'+'-'+str_valuetype[colon_index+1:end_index]
            elif len(str_valuetype[colon_index+1:end_index])==0:
                if type==DataTypeValue['unsignedInt']:
                    valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_UNINT_VALUE
                elif type==DataTypeValue['Int']:
                    valueRange=str_valuetype[start_index+1:colon_index]+'-'+MAX_INT_VALUE
            else:
                valueRange=str_valuetype[start_index+1:colon_index]+'-'+str_valuetype[colon_index+1:end_index]

    #get r/w type
    str_valuetype=sh.cell_value(row_index,CELL_RW_TYPE) 
    if 'r' == str_valuetype:
        access=AccessTypeValue['r']
    else:
        access=AccessTypeValue['w']

    #get style.
    style=StyleValue[str_valuetype]

    #get defaultvalue.
    if type== DataTypeValue['string'] or type==DataTypeValue['dateTime']:
        defaultValue=DEFAULT_STRING_VALUE
    elif type==DataTypeValue['unsignedInt'] or type==DataTypeValue['Int']:
        defaultValue='0'
    elif type==DataTypeValue['Boolean']:
        defaultValue=DEFAULT_BOOL_VALUE  

    return Attribute(name,type,access,NOTIFICATION,NEEDREBOOT,ACCESSLIST,style,valueRange,ACSSPECIAL,defaultValue)


#att=attribute("test","Unsigned","readOnly","none","","","status","0-4294967295","","0")
#print_attr_info(att)
#tmp_data=AttrToString(att)
#print tmp_data
#write_data_file(tmp_data,'configuration.xml','a')

#Some initial value definition
CONFIG_XML_FILE='configuration.xml' #The dest xml file
NEXT_LEVEL_NUM_FILE='next_level_info.txt'
TMP_NODE_BASENAME=""  #Record the father node's name
BRANCH_NODE_NEXT_LEVEL_NUM=0     #Record the nextlevel num
ROOT_NEXT_LEVEL_NUM=0
ROOT_NODE_NAME='InternetGatewayDevice.'
ROOT_TAIL_TAG='</InternetGatewayDevice. >\n'
ROOT_HEAD_TAG='<InternetGatewayDevice. type="Object" acsSpecial="" nextLevelNum="0">\n'

#print ROOT_HEAD_TAG
#Load excel file for processing
fname = "sample.xls"
bk = xlrd.open_workbook(fname)
shxrange = range(bk.nsheets)

try:
    sh = bk.sheet_by_name("Sheet1")
except:
    print "no sheet in %s named Sheet1" % fname
    sys.exit(1)

nrows = sh.nrows
ncols = sh.ncols

print "total rows: %d, total cols: %d" % (nrows,ncols)
cell_value = sh.cell_value(2,0)
#print "The 1st column title: %s" % (cell_value)
#print "type:",sh.cell_type(3,0) 

row_list = []
for i in range(0,nrows):
    row_data = sh.row_values(i)
    #print sh.row_len(i)
    row_list.append(row_data)

for i in range(0,nrows):
    str = sh.cell_value(i,0)
    if str.startswith(ROOT_NODE_NAME):
        start_index = i
        #print 'start_index:',i
        break

#Write root node head tag
write_data_file(ROOT_HEAD_TAG,CONFIG_XML_FILE,'a')

for i in range(start_index,nrows):
    #print "%s,len:%d" % (sh.cell_value(i,0),len(sh.cell_value(i,0)))
    #str = sh.cell_value(i,0)
    #len_str = len(sh.cell_value(i,0))
    
    str = sh.cell_value(i,0)
    #print "i:%d,%s" % (i, str)
    if len(str) == 0:
        continue

    #print str
    if str[-1:] == '.':
        #dump a node to file
        #node_data=node(str,"Object",ACSSPECIAL,0)
        if str != ROOT_NODE_NAME:
            ROOT_NEXT_LEVEL_NUM += 1

        if TMP_NODE_BASENAME != str:
            if len(TMP_NODE_BASENAME) > 0 and str != ROOT_NODE_NAME and TMP_NODE_BASENAME != ROOT_NODE_NAME:
                #We should write xml tail tag before write a new item
                tmp_data=NodeTailString(TMP_NODE_BASENAME)
                write_data_file(tmp_data,CONFIG_XML_FILE,'a')

                #print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
                tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
                tmp_data=LevelInfoToString(tmp_level_info)
                write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')
                BRANCH_NODE_NEXT_LEVEL_NUM = 0

            TMP_NODE_BASENAME=str
            if TMP_NODE_BASENAME != ROOT_NODE_NAME:
                node_data=Node(str,"Object",ACSSPECIAL,0)
                tmp_data=NodeToString(node_data)
                #print TMP_NODE_BASENAME
                write_data_file(tmp_data,CONFIG_XML_FILE,'a')

    else:
        #dump attribute to file        
        if TMP_NODE_BASENAME == ROOT_NODE_NAME:
            ROOT_NEXT_LEVEL_NUM += 1
        else:
            BRANCH_NODE_NEXT_LEVEL_NUM += 1
        
        #Write attribute to file
        #VALID_COLUM=3  #We just need the pre-three colums,actually.
        #print TMP_NODE_BASENAME+str.strip(),
        #print "ValueType:%s,R/W:%s" % (sh.cell_value(i,CELL_VALUE_TYPE),sh.cell_value(i,CELL_RW_TYPE))
        tmp_attr=retrieve_attribute_instance(i,sh)
        #print_attr_info(tmp_attr)
        tmp_data=AttrToString(tmp_attr)
        write_data_file(tmp_data,CONFIG_XML_FILE,'a')
        
        #Has travelled to the last branch
        if i == nrows -1:
            #write xml tail tag.
            #print 'Node:%s,NextLevelNum:%d' % (TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
            tmp_level_info = NodeLevelInfo(TMP_NODE_BASENAME,BRANCH_NODE_NEXT_LEVEL_NUM)
            tmp_data=LevelInfoToString(tmp_level_info)
            write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')
            tmp_data=NodeTailString(TMP_NODE_BASENAME)
            write_data_file(tmp_data,CONFIG_XML_FILE,'a') 

#Write root node tail tag
write_data_file(ROOT_TAIL_TAG,CONFIG_XML_FILE,'a')

#Write root node nextlevelnum info 
tmp_level_info = NodeLevelInfo(ROOT_NODE_NAME,ROOT_NEXT_LEVEL_NUM)
tmp_data=LevelInfoToString(tmp_level_info)
write_data_file(tmp_data,NEXT_LEVEL_NUM_FILE,'a')

#unload sheet to free memory
bk.unload_sheet("Sheet1")
print 'File create Done!'