Java StAX解析XML

最新推荐文章于 2024-08-08 14:14:22 发布

无门关·平常是道

最新推荐文章于 2024-08-08 14:14:22 发布

阅读量500

点赞数

分类专栏： SOA 文章标签： XML StAX

本文链接：https://blog.youkuaiyun.com/zhf257/article/details/84396256

版权

SOA 专栏收录该内容

2 篇文章

订阅专栏

1、DOM提供了一个易于使用的API，与SAX和StAX相比，它的优势在于支持XPath，不过，它也迫使将整个文档读入存储器中，这对于小文档来说没什么，但会影响大文档的性能，而对于非常大的文档来说，这是根本禁止的。

2、SAX通过作为一种“推”机制的解析器来处理该方面，也就是说，对于该解析器在文档中遇到的每种结构，都会生成相应的事件，程序员可以选择自己感兴趣的事件进行处理，不足之处在于SAX通常生成的大量事件是程序员并不关系的。而且，SAX API不提供迭代文档处理，从头到尾摧毁整个事件。

3、StAX方法解析XML

StAX即Streaming API for XML，当前最有效的XML处理方法，因此特别适合于处理复杂流程，比如数据库绑定和SOAP消息。StAX创建的信息集是非常小，可以直接作为垃圾收集的候选对象。这让XML处理任务占用较小的空间，使得它不仅适用于小型堆设备，比如移动电话，而且适用于长期运行的服务器端应用程序。

与SAX不同，StAX能够对XML文档进行写操作，这减少了需要处理的API数量。

StAX提供两种不同的解析数据模型：光标模型和迭代器模型。

Catalog.xml

<?xml version="1.0" encoding="UTF-8"?>
<catalog>
    <book sku="123_xaa">
        <title>King Lear</title>
        <author>William Shakespeare</author>
        <price>6.95</price>
        <category>classics</category>
    </book>
    <book sku="988_yty">
        <title>Hamlet</title>
        <author>William Shakespeare</author>
        <price>5.95</price>
        <category>classics</category>
    </book>
    <book sku="434_asd">
        <title>1984</title>
        <author>George Orwell</author>
        <price>12.95</price>
        <category>classics</category>
    </book>
    <book sku="876_pep">
        <title>Java Generics and Collections</title>
        <authors>
            <author>Maurice Naftalin</author>
            <author>Phillip Wadler</author>
        </authors>
        <price>34.99</price>
        <category>programming</category>
    </book>
</catalog>

使用StAX光标模型：XMLStreamReader

import static java.lang.System.out;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.XMLEvent;
/**
 * StAX光标模型
 * @author K
 *
 */
public class StaxCursor {
	private static final String db = "/ch02/Catalog.xml";

	// we'll hold values here as we find them
	private Set<String> uniqueAuthors;

	public static void main(String... args) {
		StaxCursor p = new StaxCursor();
		p.find();
	}

	// constructor
	public StaxCursor() {
		uniqueAuthors = new TreeSet<String>();
	}

	// parse the document and offload work to helpers
	public void find() {
		XMLInputFactory xif = XMLInputFactory.newInstance();
		// forward-only, most efficient way to read
		XMLStreamReader reader = null;

		// get ahold of the file
		final InputStream is = StaxCursor.class.getResourceAsStream(db);

		// whether current event represents elem, attrib, etc
		int eventType;
		String current = "";

		try {
			// create the reader from the stream
			reader = xif.createXMLStreamReader(is);

			// work with stream and get the type of event
			// we're inspecting
			while (reader.hasNext()) {
				// because this is Cursor, we get an integer token to next event
				eventType = reader.next();

				// do different work depending on current event
				switch (eventType) {
				case XMLEvent.START_ELEMENT:
					// save element name for later
					current = reader.getName().toString();

					printSkus(current, reader);
					break;

				case XMLEvent.CHARACTERS:
					findAuthors(current, reader);
					break;
				}
			} // end loop
			out.println("Unique Authors=" + uniqueAuthors);

		} catch (XMLStreamException e) {
			out.println("Cannot parse: " + e);
		}
	}

	// get the name and value of the book's sku attribute
	private void printSkus(String current, XMLStreamReader r) {
		current = r.getName().toString();

		if ("book".equals(current)) {
			String k = r.getAttributeName(0).toString();
			String v = r.getAttributeValue(0);
			out.println("AttribName " + k + "=" + v);
		}
	}

	// inspect author elements and read their values.
	private void findAuthors(String current, XMLStreamReader r)
			throws XMLStreamException {

		if ("author".equals(current)) {
			String v = r.getText().trim();

			// can get whitespace value, so ignore
			if (v.length() > 0) {
				uniqueAuthors.add(v);
			}
		}
	}
}

使用StAX迭代器模型：迭代器API比较灵活，而且易于扩展

import static java.lang.System.out;
import java.io.InputStream;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;

/**
 * StAX迭代器模型
 * @author K
 *
 */
public class StaxIterator {
	private static final String db = "/ch02/Catalog.xml";
	public static void main(String... args) {
		StaxIterator p = new StaxIterator();
		p.find();
	}
	public void find() {
		XMLInputFactory xif = XMLInputFactory.newInstance();
		// forward-only, most efficient way to read
		XMLEventReader reader = null;

		// get ahold of the file
		final InputStream is = StaxIterator.class.getResourceAsStream(db);

		try {
			// create the reader from the stream
			reader = xif.createXMLEventReader(is);

			// work with stream and get the type of event
			// we're inspecting
			while (reader.hasNext()) {
				XMLEvent e = reader.nextEvent();

				if (e.isStartElement()) {
					e = e.asStartElement().getAttributeByName(new QName("sku"));
					if (e != null) {
						out.println(e);
					}
				}
			} // end loop

		} catch (XMLStreamException e) {
			out.println("Cannot parse: " + e);
		}
	}
}

使用StAX光标API编写XML数据流

import static java.lang.System.out;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

public class WriteStax {
	private static final String REPAIR_NS = "javax.xml.stream.isRepairingNamespaces";

	private static final String NS = "http://ns.example.com/books";

	public static void main(String... args) {
		XMLOutputFactory factory = XMLOutputFactory.newInstance();
		// autobox
		factory.setProperty(REPAIR_NS, true);

		try {
			// setup a destination file
			FileOutputStream fos = new FileOutputStream("result.xml");

			// create the writer
			final XMLStreamWriter xsw = factory.createXMLStreamWriter(fos);
			xsw.setDefaultNamespace(NS);

			// open the document. Can also add encoding, etc
			xsw.writeStartDocument("1.0");
			xsw.writeEndDocument();

			xsw.writeComment("Powered by StAX");

			// make enclosing book
			xsw.writeStartElement("book");
			xsw.writeNamespace("b", NS);
			xsw.writeAttribute("sku", "345_iui");

			// make title child element
			xsw.writeStartElement(NS, "title");
			xsw.writeCharacters("White Noise");
			xsw.writeEndElement(); // close title

			xsw.writeEndElement(); // close book

			// clean up
			xsw.flush();
			fos.close();
			xsw.close();

			out.print("All done.");
		} catch (FileNotFoundException fnfe) {
			fnfe.printStackTrace();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} catch (XMLStreamException xse) {
			xse.printStackTrace();
		}
	}
}

该API非常灵活，允许按照不同程度的规范化和合法性来编写XML。可以快速、清晰底生成这样的XML片段：适合于传输到SOAP主体的有效载荷中或其他任何希望粘贴某种标记的地方。

一般来说，在两种模式中进行抉择时，如果希望能够修改事件流和采用更灵活的API，就选择迭代器。如果希望得到更快的可行新能和更小的空间，就使用光标API。

使用过滤器来提高应用程序的性能和清晰度，方法是指示解析器只提供我们所感性起的事件，使光标模式解析更有效率。实现StreamFilter接口的accept方法，然后使用它构造XMLStreamReader。当使用EventReader时，要做的所有事情就是实现EventFilter接口的accept方法。