基于jdom的xpath解析数据-优快云博客

Xpath可以快速的定位和查找所需要的字符串或是数据。工欲善其事必先利其器，要想快速的开发和处理查找xml数据，学习和掌握xpath是必不可少的。那就开始吧。

示例的student.xml文件。

<?xml version="1.0"?>
<students>
	<student id="1001">
		<name>张三</name>
		<score>85</score>
		<email>zhangsan@126.com</email>
		<age>20</age>
		<address>
			<province>广东</province>
			<city>广州</city>
		</address>
		<description>
			aaa
			<h1>bbb</h1>
			ccc
		</description>
	</student>
	<student id="1002">
		<name>李四</name>
		<score>79</score>
		<email>lisi@126.com</email>
		<age>24</age>
		<address>
			<province>广东</province>
			<city>深圳</city>
		</address>
		<description>
			111
			<h1>222</h1>
			333
		</description>
	</student>
	<student id="1003">
		<name>王五</name>
		<score>92</score>
		<email>wangwu@126.com</email>
		<age>24</age>
		<address>
			<province>广东</province>
			<city>汕头</city>
		</address>
		<description>
			xxx
			<h1>yyy</h1>
			zzz
		</description>
	</student>
</students>

编写测试的java代码。

package com.suntek.test.xpath;

import java.util.List;

import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Text;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;

public class XpathTest {
	
	/**
	 * 取得文档
	 * @return
	 * @throws Exception
	 */
	public static Document getDocument() throws Exception {
		SAXBuilder builder = new SAXBuilder();
		return builder.build(System.class.getResourceAsStream("/student.xml"));
	}
	
	/**
	 * 格式化标签
	 */
	public static String formatElement(Element elem) {
		StringBuffer sb = new StringBuffer();
		sb.append("<" + elem.getName());
		List attributes = elem.getAttributes();
		for(int i = 0; i < attributes.size(); i ++) {
			Attribute attribute = (Attribute) attributes.get(i);
			sb.append(" " + attribute.getName() + "=" + attribute.getValue());
		}
		sb.append(">");
		//这个方法可以取得Text,Element,Comment,ProcessingInstruction,CDATA,EntityRef.等集合的列表
		List children = elem.getContent();
		for(int i = 0; i < children.size(); i ++) {
			Object o = children.get(i);
			if(o instanceof Text) {
				sb.append(((Text)o).getText());
			} else if(o instanceof Element) {
				sb.append(formatElement((Element)o));
			}
		}
		sb.append("</" + elem.getName() + ">");
		return sb.toString();
	}
	
	/**
	 * 取得所有的学生
	 * @throws Exception
	 */
	public static void getAllStudent() throws Exception {
		Document doc = getDocument();
		List list = XPath.selectNodes(doc, "//student");
		for(int i = 0; i < list.size(); i ++) {
			Element elem = (Element) list.get(i);
			System.out.println(formatElement(elem));
		}
	}
	
	/**
	 * 取得符合一定分数条件的学生
	 * @param score 分数
	 * @param above 是否高于
	 * @throws Exception
	 */
	public static void getStudent(String score, boolean above) throws Exception {
		Document doc = getDocument();
		String exp = "";
		if(above) {
			exp = ">=";
		} else {
			exp = "<";
		}
		List list = XPath.selectNodes(doc, "//student[score" + exp + score+"]");
		for(int i = 0; i < list.size(); i ++) {
			Element elem = (Element) list.get(i);
			System.out.println(formatElement(elem));
		}
	}
	
	/**
	 * 假设学生这个节点的信息太多，你不希望取得它的整个节点然后再便利他的子节点找出你要的信息，
	 * 而是通过xpath直接定位到你要的信息。这样就要通过xpath来循环迭代。
	 */
	public static void getAllStudentByXpath() throws Exception {
		Document doc = getDocument();
		Document newDoc = new Document();
		List list = XPath.selectNodes(doc, "//student");
		for(int i = 0; i < list.size(); i ++) {
			Element elem = (Element) list.get(i);
			elem.detach();
			newDoc.setRootElement(elem);
			Element e = (Element) XPath.selectSingleNode(newDoc, "/student/name");
			System.out.println(formatElement(e));
		}
	}
	
	public static void getStudentNameAndDescription() throws Exception {
		Document doc = getDocument();
		Document newDoc = new Document();
		List list = XPath.selectNodes(doc, "//student");
		for(int i = 0; i < list.size(); i ++) {
			Element elem = (Element) list.get(i);
			elem.detach();
			newDoc.setRootElement(elem);
			StringBuffer sb = new StringBuffer();
			Text t = null;
			t = (Text) XPath.selectSingleNode(newDoc, "/student/name/text()");
			sb.append(t.getTextTrim()+";");
			t = (Text) XPath.selectSingleNode(newDoc, "/student/description/text()[1]");
			sb.append(t.getTextNormalize()+";");
			t = (Text) XPath.selectSingleNode(newDoc, "/student/description/h1/text()");
			sb.append(t.getTextNormalize()+";");
			t = (Text) XPath.selectSingleNode(newDoc, "/student/description/text()[2]");
			sb.append(t.getTextNormalize());
			System.out.println(sb.toString());
		}
	}

	public static void main(String[] args) throws Exception {
		getAllStudent();
		System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
		getStudent("80", true);
		System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
		getAllStudentByXpath();
		System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
		getStudentNameAndDescription();
	}

}

运行结果如下：

<student id=1001>
  <name>张三</name>
  <score>85</score>
  <email>zhangsan@126.com</email>
  <age>20</age>
  <address>
   <province>广东</province>
   <city>广州</city>
  </address>
  <description>
   aaa
   <h1>bbb</h1>
   ccc
  </description>
</student>
<student id=1002>
  <name>李四</name>
  <score>79</score>
  <email>lisi@126.com</email>
  <age>24</age>
  <address>
   <province>广东</province>
   <city>深圳</city>
  </address>
  <description>
   111
   <h1>222</h1>
   333
  </description>
</student>
<student id=1003>
  <name>王五</name>
  <score>92</score>
  <email>wangwu@126.com</email>
  <age>24</age>
  <address>
   <province>广东</province>
   <city>汕头</city>
  </address>
  <description>
   xxx
   <h1>yyy</h1>
   zzz
  </description>
</student>
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
<student id=1001>
  <name>张三</name>
  <score>85</score>
  <email>zhangsan@126.com</email>
  <age>20</age>
  <address>
   <province>广东</province>
   <city>广州</city>
  </address>
  <description>
   aaa
   <h1>bbb</h1>
   ccc
  </description>
</student>
<student id=1003>
  <name>王五</name>
  <score>92</score>
  <email>wangwu@126.com</email>
  <age>24</age>
  <address>
   <province>广东</province>
   <city>汕头</city>
  </address>
  <description>
   xxx
   <h1>yyy</h1>
   zzz
  </description>
</student>
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
<name>张三</name>
<name>李四</name>
<name>王五</name>
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
张三;aaa;bbb;ccc
李四;111;222;333
王五;xxx;yyy;zzz