java对xml的操作主要有两种:dom解析和sax解析方式
1. dom方式
dom方式的解析方法是一次性将xml文档加载到内存中。然后根据标签的层次关系,排列成类似一棵树的形状。
例如:有下面的xml文档(test.xml)
<?xml version="1.0" encoding="UTF-8" ?>
<shelf>
<book comment="first">
<id>1</id>
<name>aaaaa</name>
<price>10.2</price>
</book>
<book>
<id>1</id>
<name>bbbb</name>
<price>10</price>
</book>
</shelf>
使用dom 方式解析后的树状图为:
解析API:JAXP(Sun公司开发)、Jdom、dom4j, 其中,解析效率从小到大
2. sax方式
sax方式是一边读取文档,一边解析文档。当文档读取完成后,文档也就解析完成。
3.两种方式优缺点比较
优点 | 缺点 | |
dom方式 | 可以对xml文档方便的进行CRUD(增,删,改,查)操作 | 当文档太大的时候,比较消耗内存 |
sax方式 | 只能对xml文档进行读取,查询。也就是说插入、删除等操作是玩法进行的 | 消耗内存较小 |
4.具体代码示例
示例中使用的xml文件为上述xml文档(test.xml)
下列代码有一部分是使用了Junit单元测试,不了解JUnit的,请读者自行学习
4.1 JAXP方式
package com.zyh.xml;
import java.io.FileOutputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.junit.Before;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
//使用dom方式对xml进行CRUD
public class TestJAXPDom {
DocumentBuilderFactory factory;
DocumentBuilder builder;
Document document;
@Before
public void befor() throws Exception {
// 实例化DocumentBuilderFactory,并创建新的DocumentBuilder
factory = DocumentBuilderFactory.newInstance();
builder = factory.newDocumentBuilder();
// 根据builder对象解析test.xml文件,产生Document对象
document = builder.parse("src/test.xml");
}
// 读取<name>bbbbb</name>节点的值
@Test
public void test() {
// 获取到所有name结点
NodeList lists = document.getElementsByTagName("name");
// 得到第二个name结点,以下标0开始算
Node node = lists.item(1);
System.out.println(node.getNodeName() + ":" + node.getTextContent());
}
@Test
public void test2() throws Exception {
Node root = document.getElementsByTagName("shelf").item(0);
// 递归获取各个结点
list(root, 1);
}
private void list(Node node, int level) {
String str = "";
for(int i=0; i<level; i++) {
str += " ";
}
// 如果是元素结点,输出该节点的节点名称(其中会将空格解析为#text)
if(node instanceof Element) {
System.out.println(str + node.getNodeName());
}
NodeList lists = node.getChildNodes();
for(int i=0; i<lists.getLength(); i++) {
Node child = lists.item(i);
list(child, level+1);
}
}
// 读取<book comment="first">中的属性comment的属性值
@Test
public void test3() throws Exception {
// 应为我们知道该结点是元素结点,可以直接强制转换成Element类型(Node类型无法直接得到属性值)
Element element = (Element)document.getElementsByTagName("book").item(0);
// 根据Element队形的getAttribute方法得到属性值
System.out.println(element.getAttribute("comment"));
}
// 向第一个book节点添加一个元素节点<author>me</author>
@Test
public void add() throws Exception {
// 新建一个author节点
Node author = document.createElement("author");
// 为author节点添加值me
author.setTextContent("me");
Node node = document.getElementsByTagName("book").item(0);
// 将author节点添加到book节点中
node.appendChild(author);
// 以下是将新产生的xml写回到原文件中,如果不进行这一步,只是在内存中添加了一个节点,原文件并没有改变
TransformerFactory tf = TransformerFactory.newInstance();
Transformer former = tf.newTransformer();
former.transform(new DOMSource(document), new StreamResult(new FileOutputStream("src/test.xml")));
}
// 向第一个book节点的指定位置如name节点之前添加一个节点<author>me</author>
@Test
public void add2() throws Exception {
// 新建一个author节点
Node author = document.createElement("author");
// 为author节点添加值me
author.setTextContent("me");
// 得到参考节点
Node refChild = document.getElementsByTagName("name").item(0);
// 得到要添加节点的父节点
Node book = document.getElementsByTagName("book").item(0);
// 将author节点添加到指定位置
book.insertBefore(author, refChild);
// 以下是将新产生的xml写回到原文件中,如果不进行这一步,只是在内存中添加了一个节点,原文件并没有改变
TransformerFactory tf = TransformerFactory.newInstance();
Transformer former = tf.newTransformer();
former.transform(new DOMSource(document), new StreamResult(new FileOutputStream("src/test.xml")));
}
// 向第二个book节点添加属性comment,属性值为second
@Test
public void add3() throws Exception {
// 得到第二个book节点
Element book = (Element)document.getElementsByTagName("book").item(1);
// 添加属性comment
book.setAttribute("comment", "second");
// 以下是将新产生的xml写回到原文件中,如果不进行这一步,只是在内存中添加了一个节点,原文件并没有改变
TransformerFactory tf = TransformerFactory.newInstance();
Transformer former = tf.newTransformer();
former.transform(new DOMSource(document), new StreamResult(new FileOutputStream("src/test.xml")));
}
// 删除第二个book节点的id节点
@Test
public void delete() throws Exception {
Node book = document.getElementsByTagName("book").item(1);
Node oldChild = document.getElementsByTagName("id").item(1);
book.removeChild(oldChild);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer former = tf.newTransformer();
former.transform(new DOMSource(document), new StreamResult(new FileOutputStream("src/test.xml")));
}
// 更新第二个book节点的price节点的值为109
@Test
public void update() throws Exception {
Node price = document.getElementsByTagName("price").item(1);
price.setTextContent("109");
TransformerFactory tf = TransformerFactory.newInstance();
Transformer former = tf.newTransformer();
former.transform(new DOMSource(document), new StreamResult(new FileOutputStream("src/test.xml")));
}
}
4.2 dom4j
dom4j并不是sun公司开发的,因此需要第三方的api, jar包的下载请移步:http://download.youkuaiyun.com/download/zyh5540/5806377
package com.zyh.xml;
import java.io.FileOutputStream;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.junit.Test;
public class TestDom4j {
@Test
public void read() throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/test.xml");
//获取根节点shelf
Element root = (Element)document.getRootElement();
//获取第二本书的名称
//获取第二本书的book节点
Element book = (Element)root.elements("book").get(1);
//获取name节点
Element name = (Element)book.element("name");
String nameValue = name.getText();
System.out.println(nameValue);
//获取第一本书的comment属性
//此处使用的是element()方法,得到的是root下的第一个元素节点,使用elements()方法,得到的是一个集合
Element book1 = (Element)root.element("book");
//获取属性值的两种方式
// String comment = book1.attribute("comment").getValue();
String comment = book1.attributeValue("comment");
System.out.println(comment);
}
@Test
public void write() throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/test.xml");
//向第一本书添加<author>张三</author>(添加位置为book标签最后)
document.getRootElement().element("book").addElement("author").addText("张三");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
// format.setEncoding("gb2312");
// XMLWriter writer = new XMLWriter(new FileWriter("src/test.xml"), format);
// XMLWriter writer = new XMLWriter(new OutputStreamWriter(new FileOutputStream("src/test.xml"), "UTF-8"), format);
XMLWriter writer = new XMLWriter(new FileOutputStream("src/test.xml"), format);
writer.write(document);
writer.close();
}
//向指定位置添加节点
//向第二本书的price节点上方添加<author>zyh</author>
@Test
public void write2() throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/test.xml");
//向特定位置添加节点,只能先得到添加节点的父节点的所有子节点,然后在需要添加的位置添加
Element book = (Element)document.getRootElement().elements("book").get(1);
List childs = book.elements();
Element author = DocumentHelper.createElement("author");
author.setText("zyh");
//在price节点上方添加节点,price节点的索引值为2,2往后的节点自动后移
childs.add(2, author);
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
XMLWriter writer = new XMLWriter(new FileOutputStream("src/test.xml"), format);
writer.write(document);
writer.close();
}
//删除第一本书的<price>10.2</price>标签
@Test
public void delete() throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/test.xml");
Element price = (Element)document.getRootElement().element("book").element("price");
price.getParent().remove(price);
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
XMLWriter writer = new XMLWriter(new FileOutputStream("src/test.xml"), format);
writer.write(document);
writer.close();
}
//将第一本书的名称改为:java核心技术
@Test
public void update() throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/test.xml");
Element name = (Element)document.getRootElement().element("book").element("name");
name.setText("java核心技术");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
XMLWriter writer = new XMLWriter(new FileOutputStream("src/test.xml"), format);
writer.write(document);
writer.close();
}
}
4.3 jdom
JDOM是一个开源项目,因此需要第三方的api, jar包的下载请移步:http://download.youkuaiyun.com/download/zyh5540/5806147
package com.zyh.xml;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.junit.Test;
public class TestJdom {
//将整个文件读取出来
@Test
public void list() throws JDOMException, IOException {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build("src/test.xml");
Element root = document.getRootElement();
//得到子节点,并循环遍历
List lists = root.getChildren();
System.out.println("<" + root.getName() + ">");
for(int i=0; i<lists.size(); i++) {
Element element = (Element)lists.get(i);
System.out.print(" <" + element.getName());
//得到当前标签的所有属性
List attrs = element.getAttributes();
for(int j=0; j<attrs.size(); j++) {
Attribute attr = (Attribute)attrs.get(j);
System.out.print(" " + attr.getName() +"=" + attr.getValue() );
}
System.out.println( ">");
//得到当前标签的所有子节点
List children = element.getChildren();
for(int j=0; j<children.size(); j++) {
Element child = (Element)children.get(j);
System.out.println(" <"+ child.getName() +">" + child.getTextTrim() +"</"+ child.getName() +">");
}
System.out.println(" </" + element.getName()+ ">");
}
System.out.print("</" + root.getName() + ">");
}
//读取第二本书的name的值
@Test
public void read() throws Exception {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build("src/test.xml");
Element root = document.getRootElement();
Element book = (Element)root.getChildren("book").get(1);
//第一种方法
Element name = (Element)book.getChild("name");
System.out.println(name.getText());
//第二种方法
System.out.println(book.getChildText("name"));
}
//向第一本书增加author,<author>张三</author>
@Test
public void add() throws Exception {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build("src/test.xml");
Element book = document.getRootElement().getChild("book");
Element author = new Element("author");
author.setText("张三");
//插入位置为book标签的最后
// book.addContent(author);
//向book标签的第二个位置插入author
book.addContent(2, author);
XMLOutputter out = new XMLOutputter();
out.setFormat(Format.getPrettyFormat().setEncoding("UTF-8"));//设置文件编码,默认为UTF-8
out.output(document, new FileOutputStream("src/test.xml"));
}
//删除第一本书的author标签
@Test
public void delete() throws Exception {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build("src/test.xml");
Element book = document.getRootElement().getChild("book");
book.removeChild("author");
XMLOutputter out = new XMLOutputter();
out.setFormat(Format.getPrettyFormat().setEncoding("UTF-8"));//设置文件编码,默认为UTF-8
out.output(document, new FileOutputStream("src/test.xml"));
}
//更新第一本书的price为111
@Test
public void update() throws Exception {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build("src/test.xml");
Element price = document.getRootElement().getChild("book").getChild("price");
price.setText("111");
XMLOutputter out = new XMLOutputter();
out.setFormat(Format.getPrettyFormat().setEncoding("UTF-8"));//设置文件编码,默认为UTF-8
out.output(document, new FileOutputStream("src/test.xml"));
}
4.4 sax
在这个程序中使用的JavaBean,请读者自己根据xml文档建立
package com.zyh.xml.sax;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import com.zyh.bean.Book;
public class SaxXml {
public static void main(String[] args) throws Exception {
//1.创建解析工厂
SAXParserFactory factory = SAXParserFactory.newInstance();
//2.得到解析器
SAXParser parser = factory.newSAXParser();
//3.读取解析器
XMLReader reader = parser.getXMLReader();
//4.设置内容处理器
// reader.setContentHandler(new ListHandler());
// reader.setContentHandler(new TagValueHandler());
BeanHandler beanHandler = new BeanHandler();
reader.setContentHandler(beanHandler);
//5.读取xml文件
reader.parse("src/test.xml");
List<Book> books = beanHandler.getBooks();
System.out.println(books);
}
}
//列举出所有标签
class ListHandler implements ContentHandler {
@Override
public void setDocumentLocator(Locator locator) {
}
@Override
public void startDocument() throws SAXException {
}
@Override
public void endDocument() throws SAXException {
}
@Override
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
@Override
public void endPrefixMapping(String prefix) throws SAXException {
}
@Override
public void startElement(String uri, String localName, String name,
Attributes atts) throws SAXException {
System.out.print("<" + name);
for(int i=0; null!=atts && i<atts.getLength(); i++) {
String attrName = atts.getQName(i);
String attrValue = atts.getValue(i);
System.out.print(" " + attrName + "=" + attrValue);
}
System.out.print(">");
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
System.out.print("</" + qName + ">");
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
System.out.print(new String(ch, start, length));
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
}
@Override
public void processingInstruction(String target, String data)
throws SAXException {
}
@Override
public void skippedEntity(String name) throws SAXException {
}
}
//获取特定标签的值,如第二个book的name值
class TagValueHandler extends DefaultHandler {
//当前标签的名称
private String currentTagName;
//当前name标签的数量
private int currentNumber = 0;
//需要的name标签的索引(从1开始)
private int needNumber = 2;
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if("name".equals(qName)) {
this.currentTagName = "name";
this.currentNumber++;
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
//当前标签是name标签,且当前标签数量等于要查找标签的索引
if("name".equals(currentTagName) && currentNumber == needNumber) {
System.out.println(new String(ch, start, length));
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
this.currentTagName = null;
}
}
//将xml文档中的数据封装到JavaBean中
class BeanHandler extends DefaultHandler {
private List<Book> books = new ArrayList<Book>();
private Book book = null;
private String currentTagName;
public List<Book> getBooks() {
return books;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
this.currentTagName = qName;
if("book".equals(qName)) {
book = new Book();
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if("id".equals(currentTagName)) {
book.setId(new String(ch, start, length));
} else if("name".equals(currentTagName)) {
book.setName(new String(ch, start, length));
} else if("price".equals(currentTagName)) {
book.setPrice(new String(ch, start, length));
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if("book".equals(qName)) {
books.add(book);
book = null;
}
this.currentTagName = null;
}
}