www.vdisk.cn ( for example http://www.vdisk.cn/msdiaoxian) has the following xpath
======link of ALLFILES
<div class='tag'><a href='?tag=ALLFILES&p=1' title='ALLFILES(339)'>ALLFILES(339)</a></div>
"//a[@href[contains(., '?tag=ALLFILES')]]/@href[1]"
======each page
<a href='?tag=ALLFILES&p=2' title='see'>2</a>
"//a[@href[contains(., '?tag=ALLFILES')]]/@href[1]"
======each category
<div class='tag'><a href='?tag=%E9%98%85%E8%AF%BB&p=1' title='read(2)'>read(2)</a></div>
===== each file
<table width="100%" class="ft"><tbody>
<td width="*">
<a href="/down/index/9405067" target="_blank">s1.0.2.apk</a>
</td>
<td width="130" align="right"><font style="color:#ccc">1.08 MB</font></td>
"//table[@class='ft']/tbody/tr"
for filename="tr/td[1]/a/text()"
for file url="tr/td[1]/a/@href[1]"
for file size="tr/td[2]//font/text()"
===== get file name/url, no include file size
<a href="/down/index/9405067" target="_blank">s1.0.2.apk</a>
"//a[@href[contains(., '/down/index/')]]"
So prepare xml
<?xml version="1.0" encoding="UTF-8"?>
<!-- Expects following initial variable: search - search expression -->
<config charset="UTF-8">
<!--
<include path="functions.xml" />
-->
<!-- defines search keyword and start URL -->
<var-def name="search" overwrite="false">
<template>${searchResult.getWrappedObject().user}</template>
</var-def>
<var-def name="searchResultObject" overwrite="false">
<template>${searchResult.getWrappedObject()}</template>
</var-def>
<var-def name="currentUser" overwrite="false">
<template>${searchResult.getWrappedObject().user}</template>
</var-def>
<var-def name="targetWebsite" overwrite="false">
<template>${searchResult.getWrappedObject().rootWebSite}/${searchResult.getWrappedObject().user}
</template>
</var-def>
<var-def name="rootWebsite" overwrite="false">
<template>${searchResult.getWrappedObject().rootWebSite}</template>
</var-def>
<var-def name="urls">
<xpath expression="//div[@class='tag']/a">
<html-to-xml>
<http url="${targetWebsite}" />
</html-to-xml>
</xpath>
</var-def>
<loop item="url" index="i" filter="unique">
<list>
<var name="urls" />
</list>
<body>
<var-def name="title">
<regexp>
<regexp-pattern>(.*)\(.*</regexp-pattern>
<regexp-source>
<xpath expression="//@title">
<var name="url" />
</xpath>
</regexp-source>
<regexp-result>
<template>${_1}</template>
</regexp-result>
</regexp>
</var-def>
<var-def name="u">
<xpath expression="//@href">
<var name="url" />
</xpath>
</var-def>
<script><![CDATA[
Object o=searchResult.getWrappedObject();
o.addCategory(title.toString(),u.toString());
]]></script>
<var-def name="urls1">
<xpath expression="//table[@class='ft']/tbody/tr">
<html-to-xml>
<http url="${targetWebsite}/${u}/" />
</html-to-xml>
</xpath>
</var-def>
<loop item="url1" index="j" filter="unique">
<list>
<var name="urls1" />
</list>
<body>
<var-def name="f1">
<xpath expression="tr/td[1]/a/text()">
<var name="url1" />
</xpath>
</var-def>
<var-def name="f2">
<xpath expression="tr/td[1]/a/@href[1]">
<var name="url1" />
</xpath>
</var-def>
<var-def name="f3">
<xpath expression="tr/td[2]//font/text()">
<var name="url1" />
</xpath>
</var-def>
<script><![CDATA[
Object o=searchResult.getWrappedObject();
o.addFile(f1.toString(),f3.toString(),f2.toString(),title.toString());
]]></script>
</body>
</loop>
</body>
</loop>
<script><![CDATA[
SetContextVar("categories", searchResult.getWrappedObject().getCategories());
]]></script>
<file action="write" path="${currentUser}.xml" charset="UTF-8">
<template>
<![CDATA[ <user name="${currentUser}"> ]]>
</template>
<loop item="category" index="i" filter="unique">
<list>
<var name="categories" />
</list>
<body>
<template>
<![CDATA[ <category name="${category.getWrappedObject().getName()}" url="${targetWebsite}/${category.getWrappedObject().getUrl()}"> ]]>
</template>
<script><![CDATA[
SetContextVar("files", category.getWrappedObject().getFiles());
]]></script>
<loop item="file" index="i" filter="unique">
<list>
<var name="files" />
</list>
<body>
<template>
<![CDATA[ <file name="${file.getWrappedObject().getFileName()}" url="${rootWebsite}${file.getWrappedObject().getFileUrl()}" size="${file.getWrappedObject().getFileSize()}"/> ]]>
</template>
</body>
</loop>
<template>
<![CDATA[ </category> ]]>
</template>
</body>
</loop>
<![CDATA[ </user> ]]>
</file>
</config>The result in file
<user name="msdiaoxian">
<category name="阅读" url="http://www.vdisk.cn/msdiaoxian/?tag=%E9%98%85%E8%AF%BB&p=1">
<file name="福昕PDF阅读4.3.1.218.7z" url="http://www.vdisk.cn/down/index/6831851" size="4.50 MB"/>
<file name="PDFReader7.7z" url="http://www.vdisk.cn/down/index/5739663" size="5.54 MB"/>
</category>
<category name="输入工具" url="http://www.vdisk.cn/msdiaoxian/?tag=%E8%BE%93%E5%85%A5%E5%B7%A5%E5%85%B7&p=1">
<file name="QQ五笔0.1倍QQ升级加速_14_303.exe" url="http://www.vdisk.cn/down/index/6108556" size="10.37 MB"/>
<file name="极点五笔扩展牛津英汉辞典.rar" url="http://www.vdisk.cn/down/index/5992907" size="4.98 MB"/>
<file name="QQ五笔QQ0.1倍加速QQ升级Wubi_Setup_13_283.exe" url="http://www.vdisk.cn/down/index/5971172" size="10.72 MB"/>
<file name="小鸭五笔 V3.2.1016官方安装版.EXE" url="http://www.vdisk.cn/down/index/5971149" size="2.27 MB"/>
<file name="搜狗拼音输入法 V5.1.5272 官方版.EXE" url="http://www.vdisk.cn/down/index/5246711" size="17.41 MB"/>
</category>
<category name="软件工程" url="http://www.vdisk.cn/msdiaoxian/?tag=%E8%BD%AF%E4%BB%B6%E5%B7%A5%E7%A8%8B&p=1">
<file name="现代软件工程 张家浩-ppt.rar" url="http://www.vdisk.cn/down/index/5442871" size="13.67 MB"/>
<file name="实用软件工程教程资源.zip" url="http://www.vdisk.cn/down/index/5442868" size="5.21 MB"/>
<file name="软件工程――原理、方法和工具资源.rar" url="http://www.vdisk.cn/down/index/5442862" size="5.88 MB"/>
<file name="软件工程2010.rar" url="http://www.vdisk.cn/down/index/5442852" size="5.53 MB"/>
</category>
<category name="课程作业" url="http://www.vdisk.cn/msdiaoxian/?tag=%E8%AF%BE%E7%A8%8B%E4%BD%9C%E4%B8%9A&p=1">
<file name="数据库习题.7z" url="http://www.vdisk.cn/down/index/8416008" size="40.16 KB"/>
<file name="概率论复习.7z" url="http://www.vdisk.cn/down/index/8189342" size="275.92 KB"/>
<file name="软件工程复习题.7z" url="http://www.vdisk.cn/down/index/8154440" size="141.97 KB"/>
<file name="CPU报告书.doc" url="http://www.vdisk.cn/down/index/6079883" size="0.70 MB"/>
<file name="quartusII设计简单CPU.rar" url="http://www.vdisk.cn/down/index/6063094" size="3.43 MB"/>
<file name="HHH_Work.rar" url="http://www.vdisk.cn/down/index/5654202" size="2.24 MB"/>
<file name="EClock.7z" url="http://www.vdisk.cn/down/index/5651336" size="346.20 KB"/>
<file name="复件 msdx.rar" url="http://www.vdisk.cn/down/index/5555606" size="3.08 MB"/>
<file name="HHH_Work.7z" url="http://www.vdisk.cn/down/index/5542889" size="469.93 KB"/>
<file name="HHH_Work.7z" url="http://www.vdisk.cn/down/index/5513882" size="351.92 KB"/>
<file name="add.7z" url="http://www.vdisk.cn/down/index/5513718" size="204.45 KB"/>
<file name="录像2仿真.7z" url="http://www.vdisk.cn/down/index/5512301" size="1.01 MB"/>
<file name="录像1.7z" url="http://www.vdisk.cn/down/index/5511173" size="1.12 MB"/>
<file name="adder.7z" url="http://www.vdisk.cn/down/index/5511170" size="134.93 KB"/>
<file name="half-add.7z" url="http://www.vdisk.cn/down/index/5441566" size="143.06 KB"/>
<file name="Chess.rar" url="http://www.vdisk.cn/down/index/5418558" size="0.86 MB"/>
<file name="TestLogin.java" url="http://www.vdisk.cn/down/index/5418281" size="1142 B"/>
</category>
289

被折叠的 条评论
为什么被折叠?



