/*java and xml 3nd 学习笔记
DOM解析轻松入门(一)
author:shine
*/
DOM解析:与上几次讲得SAX解析稍加对比:
1)DOM解析的控制性,和可操作性比SAX强。但是以牺牲“效率”为代价的。
2)SAX是按事件触发来顺序解析xml,但DOM不同,它会把整个XML一次转换成DOM树的结构存放在内存当中,如果需要访问哪个节点,可以通过DOM树的API
迅速找到相应的节点(如:getElementById,getElementByName,getParent等)。
好了,废话少说,看例子吧:
(以下这个例子完成了,从一个xml文件解析并读出出来,然后采用“递归”的形式写入另一个xml文件中,java不像.net有save()方法可以直接修改xml文件,java只能在内存中修改,要想修改xml文件,下面的例子就是方法之一,DOM解析轻松入门(二)中也会用到这个类)
先准备一个递归写入的类:
package DOMParser;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class DOMSerializer {
private String indent = ""; //默认缩进
private String lineSeparator = "/r/n"; //默认换行
private String encoding = "UTF-8"; //默认字符编码
public void setEncoding(String encoding) {
this.encoding = encoding;
}
public void setIndent(int numSpace) { //设置缩进
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < numSpace; i++) {
buffer.append(" ");
}
this.indent = buffer.toString();
}
public void setLineSeperator(String lineSeperator) {
this.lineSeparator = lineSeperator;
}
//进行输出流的转换
public void serialize(Document doc, OutputStream out) throws IOException {
Writer writer;
try {
writer = new OutputStreamWriter(out,this.encoding);
this.serialize(doc, writer);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//进行输出流的转换
public void serialize(Document doc, File file) {
try {
Writer writer = new FileWriter(file);
this.serialize(doc, writer);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//接受已经转换好的输出流,调用serializeNode方法进行递归写出
public void serialize(Document doc, Writer writer) throws IOException {
this.serializeNode(doc, writer, "");
try {
writer.flush();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//递归方法写出所有检点
public void serializeNode(Node node, Writer writer, String indentLevel)
throws IOException {
switch (node.getNodeType()) {
case Node.DOCUMENT_NODE:
this.caseDOCUMENT_NODE(node, writer, indentLevel);
break;
case Node.ELEMENT_NODE:
this.caseELEMENT_NODE(node, writer, indentLevel);
break;
case Node.TEXT_NODE:
this.caseTEXT_NODE(node, writer, indentLevel);
break;
case Node.CDATA_SECTION_NODE:
this.caseCDATA_SECTION_NODE(node, writer, indentLevel);
break;
case Node.COMMENT_NODE:
this.caseCOMMENT_NODE(node, writer, indentLevel);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
this.casePROCESSING_INSTRUCTION_NODE(node, writer, indentLevel);
break;
case Node.ENTITY_REFERENCE_NODE:
this.caseENTITY_REFERENCE_NODE(node, writer, indentLevel);
break;
case Node.DOCUMENT_TYPE_NODE:
this.caseDOCUMENT_TYPE_NODE(node, writer, indentLevel);
break;
}
}
//处理文档节点
public void caseDOCUMENT_NODE(Node node, Writer writer, String indentLevel)
throws IOException {
Document doc = (Document) node;
writer.write("<?xml version=/"1.0/" encoding=/"UTF-8/" standalone=/"yes/"?>");
writer.write(this.lineSeparator);
NodeList nodeList = doc.getChildNodes();
if(nodeList != null) {
for(int i=0; i<nodeList.getLength(); i++) {
this.serializeNode(nodeList.item(i), writer, indentLevel);
}
}
//也可以使用下面这一句代替上面的if子句,但是这种方式会忽略实体声明,DTD的声明
//this.serializeNode(doc.getDocumentElement(), writer, indentLevel);
}
//处理元素节点
public void caseELEMENT_NODE(Node node, Writer writer, String indentLevel) throws IOException {
String name = node.getNodeName();
writer.write(indentLevel + "<" + name);
//遍历属性
NamedNodeMap attrs = node.getAttributes();
for(int i=0; i<attrs.getLength(); i++) {
Node attr = attrs.item(i);
writer.write(" " + attr.getNodeName() + "=/"");
this.print(writer, attr.getNodeValue());
writer.write("/"");
}
writer.write(">");
//递归遍历子元素
NodeList children = node.getChildNodes();
if(children != null) {
if(children.item(0)!=null && children.item(0).getNodeType()==Node.ELEMENT_NODE) {
writer.write(this.lineSeparator);
}
for(int i=0; i<children.getLength(); i++) {
this.serializeNode(children.item(i), writer, indentLevel+this.indent);
}
if(children.item(0) !=null &&children.item(children.getLength() - 1).getNodeType() == Node.ELEMENT_NODE) {
writer.write(indentLevel);
}
}
writer.write("</" + name + ">");
writer.write(this.lineSeparator);
}
//处理CDATA片段
public void caseCDATA_SECTION_NODE(Node node, Writer writer,
String indentLevel) throws DOMException, IOException {
writer.write("<![CDATA[" + node.getNodeValue() + "]]>");
}
//处理文本节点
public void caseTEXT_NODE(Node node, Writer writer, String indentLevel) throws DOMException, IOException {
this.print(writer, node.getNodeValue());
}
//处理注释节点
public void caseCOMMENT_NODE(Node node, Writer writer, String indentLevel) throws DOMException, IOException {
writer.write("<!--"+node.getNodeValue()+"-->");
writer.write(this.lineSeparator);
}
//处理指令节点
public void casePROCESSING_INSTRUCTION_NODE(Node node, Writer writer,
String indentLevel) throws IOException {
writer.write("<?" + node.getNodeName() + " "+ node.getNodeValue());
writer.write(this.lineSeparator);
}
//处理实体引用
public void caseENTITY_REFERENCE_NODE(Node node, Writer writer,
String indentLevel) throws IOException {
writer.write("&"+node.getNodeName()+";");
}
//处理文档类型节点
public void caseDOCUMENT_TYPE_NODE(Node node, Writer writer,
String indentLevel) throws IOException {
DocumentType docType = (DocumentType)node;
String publicId = docType.getPublicId();
String systemId = docType.getSystemId();
String internalSubset = docType.getInternalSubset();
writer.write("<!DOCTYPE "+docType.getName());
if(publicId != null) {
writer.write("PUBLIC /"" + publicId + "/"");
}
if(systemId != null) {
writer.write("SYSTEM /""+ systemId + "/"");
}
if(internalSubset != null) {
writer.write(" [" + internalSubset + "]");
}
writer.write(">");
writer.write(this.lineSeparator);
}
//处理预定义的实体引用
public void print(Writer writer,String s) throws IOException {
if(s == null) return;
for(int i=0; i<s.length(); i++) {
char c = s.charAt(i);
switch(c) {
case '<':
writer.write("<");
case '>':
writer.write(">");
case '&':
writer.write("&");
case '/r':
writer.write("
");
default:
writer.write(c);
}
}
}
}
再进行XML解析读出,并调用上面的类进行递归写出:
package DOMParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class SerializeTester {
private File inputXML = null;
public SerializeTester(File inputXML) {
this.inputXML = inputXML;
}
//进行DOM解析,得到doc元素,调用serialize方法进行写出
public void test(OutputStream outputStream) throws SAXException,
IOException {
DOMParser parser = new DOMParser();
InputSource inputSource = new InputSource(new FileInputStream(inputXML));
parser.parse(inputSource);
Document doc = parser.getDocument();
DOMSerializer domSerializer = new DOMSerializer();
domSerializer.setIndent(2); //设置缩进
domSerializer.serialize(doc, outputStream);
}
public static void main(String[] args) {
String inputXMLURI = "D://workplace//test1.xml";
String outputXMLURI = "D://workplace//test2.xml";
SerializeTester tester = new SerializeTester(new File(inputXMLURI));
try {
tester.test(new FileOutputStream(new File(outputXMLURI)));
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/*
DOM解析轻松入门(二)--DOM实现增删改查 2008-2-16
*/