在工作中,xml作为很多程序的参数配置文件被使用,在Oracle数据库中有时需要保存配置文件为clob模式。但是clob模式无法直观查看,而且在很多情况下,需要解析除xml文本中的字段值并保存到数据库便于查询,因此需要用到解析xml文件。
以下是一个例子,xml配置文件保存在表1的content字段,保存为clob文本模式,以下SQL的作用是解析出每个字段的值。
以下是xml文件的第一个模式内容:解析xml模板1
<?xml version="1.0" encoding="UTF-8"?>
<file-transport>
<NRJF1_0039>
<coll-points>TT95105_800</coll-points>
<TT95105_800>
<time-range></time-range>
<heartbeat-timeout>150</heartbeat-timeout>
<idle-time>5</idle-time>
<compress-mode>Z</compress-mode>
<thread_num>1</thread_num>
<workspace>/jzcj3/workspace/TT95105_800</workspace>
<name-filter>.*</name-filter>
<src>
<type>1</type>
<host>10.254.96.23</host>
<port>21</port>
<username>mcb3tran</username>
<password>mcB3!871</password>
<path>/opt/cbbs/pcs/other/data/incoming</path>
<filename-rule>(95_800_)YYYYMMDD(.)IIII(.871)</filename-rule>
</src>
<default-filename-rule></default-filename-rule>
<dest>
<type>0</type>
<path>/jzcj3/sendJF/NRJF_7</path>
</dest>
<dest>
<type>0</type>
<path>/jzcj3/sendNETWORK/NRJF2_7</path>
</dest>
<backup>
<type>0</type>
<path>/jzcj3/Ndata/NRJF2/95105_800</path>
</backup>
<after-handle>delete</after-handle>
</TT95105_800>
</NRJF1_0039>
</file-transport>
<!-- <BUS_TYPE>【95105,800】</BUS_TYPE>-->
以下是第二个模式内容:解析xml模板2
<?xml version="1.0" encoding="UTF-8"?>
<file-transport>
<send_JF1001_7>
<coll-points>JF_NRJF2_7</coll-points>
<JF_NRJF2_7>
<time-range></time-range>
<heartbeat-timeout>150</heartbeat-timeout>
<idle-time>30</idle-time>
<compress-mode></compress-mode>
<thread_num>3</thread_num>
<workspace>/jzcj3/workspace/JF_NRJF2_7</workspace>
<name-filter>.*</name-filter>
<src>
<type>0</type>
<path>/jzcj3/sendJF/NRJF2_7</path>
<filename-rule>*</filename-rule>
</src>
<remote>
<type>1</type>
<host>snyfj02.boss</host>
<port>21</port>
<username>billing</username>
<password>Ynjf+!312</password>
<path>/billdata3/data/srcFile/NRJF2</path>
</remote>
<default-filename-rule></default-filename-rule>
<dest>
<type>0</type>
<path>/jzcj3/send_trash/JF_NRJF2_7</path>
</dest>
<backup>
<type>0</type>
<path></path>
</backup>
<after-handle>delete</after-handle>
</JF_NRJF2_7>
</send_JF1001_7>
</file-transport>
针对以上两个内容保存在表table的content字段,解析脚本如下,比较繁杂,仅作笔记参考
实际就是利用replace和substr截取和替换出各个字段的值
以下SQL中用到的表ucr_coll.td_c_config是一个包含content字段的表,实际只是从该表取出content字段解析
在实际使用中,测试时可以将t.content替换成为上面2个xml文本或将以上2个xml文本插入临时表,注意是clob模式。
select
/*parale,t,8*/
b.file_id COLL_NAME,
b.srchost SRC_HOST,
b.username SRC_USER,
b.password SRC_PASS,
b.thread_num THREAD_NUM,
b.idle_time IDLE_TIME,
b.BUS_TYPE BUS_TYPE,
b.srcpath SRC_PATH,
b.srcrule SRC_RULE,
replace(replace(replace(replace(replace(b.destPath, '<type>0</type>', ''),
'<dest>',
''),
'</path>',
''),
'</dest>',
','),
'<path>',
'') LOCAL_PATH, -- trim 去除字符串中的空格
b.destrule LOCAL_RULE,
b.remotehost REMOTE_HOST,
b.remote_user REMOTE_USER,
b.remote_passwd REMOTE_PASS,
b.remotepath REMOTE_PATH,
trim(replace(replace(replace(replace(b.backup, '<type>0</type>', ''),
'<path>',
''),
'</backup',
''),
'</path>',
'')) BAK_PATH,
b.after_get AFTER_HANDLE
from (select a.file_id,
substr(a.src,
instr(a.src, '<host>') + 6,
instr(a.src, '</host>', -1) - instr(a.src, '<host>') - 6) srcHost,
replace(replace(replace(a.backup, '<type>0</type><path>', ''),
'</path></backup>',
''),
'</path></backup',
'') backup,
substr(a.src,
instr(a.src, '<path>') + 6,
instr(a.src, '</path>', -1) - instr(a.src, '<path>') - 6) srcPath,
replace(replace(replace(a.dest,
'</path></dest><dest><type>0</type><path>',
','),
'<type>0</type><path>',
''),
'</path></dest',
'') || ',' destPath,
substr(a.src,
instr(a.src, '<filename-rule>') + 15,
instr(a.src, '</filename-rule>', -1) -
instr(a.src, '<filename-rule>') - 15) srcRule,
a.destRule,
substr(a.remote,
instr(a.remote, '<host>') + 6,
instr(a.remote, '</host>', -1) -
instr(a.remote, '<host>') - 6) remoteHost,
----传输进程用户名
substr(a.remote,
instr(a.remote, '<password>') + 10,
instr(a.remote, '</password>', -1) -
instr(a.remote, '<password>') - 10) remote_user,
----传输进程远端密码
substr(a.remote,
instr(a.remote, '<username>') + 10,
instr(a.remote, '</username>', -1) -
instr(a.remote, '<username>') - 10) remote_passwd,
----传输进程远端目录
substr(a.remote,
instr(a.remote, '<path>') + 6,
instr(a.remote, '</path>', -1) -
instr(a.remote, '<path>') - 6) remotePath,
----用户名解析
substr(a.src,
instr(a.src, '<username>') + 10,
instr(a.src, '</username>', -1) -
instr(a.src, '<username>') - 10) username,
----密码名解析
substr(a.src,
instr(a.src, '<password>') + 10,
instr(a.src, '</password>', -1) -
instr(a.src, '<password>') - 10) password,
--删除后的动作
a.after_get after_get,
--当前线程数
a.thread_num thread_num,
--idle time
a.idle_time idle_time,
-- 业务类型备注
a.BUS_TYPE BUS_TYPE
from (select
--业务类型备注
replace(replace(to_char(substr(t.config_content,
instr(t.config_content, --instr 从0开始
'<BUS_TYPE>') + 10,
instr(t.config_content,
'</BUS_TYPE>') -
(instr(t.config_content,
'<BUS_TYPE>') + 10))),
' ',
''),
chr(10),
'') BUS_TYPE,
--当前的线程数
replace(replace(to_char(substr(t.config_content,
instr(t.config_content, --instr 从0开始
'<thread_num>') + 12,
instr(t.config_content,
'</thread_num>') -
(instr(t.config_content,
'<thread_num>') + 12))),
' ',
''),
chr(10),
'') thread_num,
--idle time
replace(replace(to_char(substr(t.config_content,
instr(t.config_content, --instr 从0开始
'<idle-time>') + 11,
instr(t.config_content,
'</idle-time>') -
(instr(t.config_content,
'<idle-time>') + 11))),
' ',
''),
chr(10),
'') idle_time,
replace(replace(to_char(substr(t.config_content,
instr(t.config_content,
'<src>'),
instr(t.config_content,
'</src>',
-1) -
instr(t.config_content,
'<src>'))),
' ',
''),
chr(10),
'') src,
replace( --replace 替换空字符
replace(replace(replace(replace(to_char(substr(t.config_content,
instr(t.config_content,
'<dest>') + 6,
instr(t.config_content,
'</dest>',
-7) -
(instr(t.config_content,
'<dest>') + 6))),
chr(10),
''),
chr(32),
'') --替换空格符
,
chr(9),
'') --替换制表符
,
chr(13),
'') --替换回车符
,
chr(10),
'') -- 替换换行符
dest,
replace(replace(to_char(substr(t.config_content,
instr(t.config_content,
'<remote>') + 8,
instr(t.config_content,
'</remote>',
-1) -
instr(t.config_content,
'<remote>'))),
' ',
''),
chr(10),
'') remote,
replace(replace(to_char(substr(t.config_content,
instr(t.config_content,
'<default-filename-rule>') + 23,
instr(t.config_content,
'</default-filename-rule>',
-1) -
instr(t.config_content,
'<default-filename-rule>') - 23)),
' ',
''),
chr(10),
'') destRule,
replace(replace(to_char(substr(t.config_content,
instr(t.config_content,
'<backup>') + 8,
instr(t.config_content,
'</backup>',
-1) -
instr(t.config_content,
'<backup>'))),
' ',
''),
chr(10),
'') backup,
--获取后的动作,是否删除
replace(replace(to_char(substr(t.config_content,
instr(t.config_content, --instr 从0开始
'<after-handle>') + 14,
instr(t.config_content,
'</after-handle>') -
(instr(t.config_content,
'<after-handle>') + 14))),
' ',
''),
chr(10),
'') after_get,
t.file_id
from ucr_coll.td_c_config t) a) b
where b.file_id not like '%_m'
--and b.file_id = 'ROAM_AUTOREC_GET'
and b.file_id not like '%_l'
--and b.backup is not null
--and b.srchost is not null
order by b.srchost;
结果: