DOM解析轻松入门(一) (例子为主)

本文介绍 Java 中 DOM 解析的基本原理与应用实践,通过示例代码详细展示了如何使用 DOM 解析 XML 文件,并实现了 XML 文件的读取和递归写入。此外,还探讨了如何利用 DOM 进行 XML 文档的增删改查。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

/*java and xml 3nd 学习笔记
DOM解析轻松入门(一)
author:shine
*/
DOM解析:与上几次讲得SAX解析稍加对比:
1)DOM解析的控制性,和可操作性比SAX强。但是以牺牲“效率”为代价的。
2)SAX是按事件触发来顺序解析xml,但DOM不同,它会把整个XML一次转换成DOM树的结构存放在内存当中,如果需要访问哪个节点,可以通过DOM树的API

迅速找到相应的节点(如:getElementById,getElementByName,getParent等)。

好了,废话少说,看例子吧:
(以下这个例子完成了,从一个xml文件解析并读出出来,然后采用“递归”的形式写入另一个xml文件中,java不像.net有save()方法可以直接修改xml文件,java只能在内存中修改,要想修改xml文件,下面的例子就是方法之一,DOM解析轻松入门(二)中也会用到这个类)

先准备一个递归写入的类:
package DOMParser;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class DOMSerializer {
 private String indent = "";  //默认缩进

 private String lineSeparator = "/r/n";  //默认换行

 private String encoding = "UTF-8";  //默认字符编码

 public void setEncoding(String encoding) {
  this.encoding = encoding;
 }

 public void setIndent(int numSpace) {  //设置缩进
  StringBuffer buffer = new StringBuffer();
  for (int i = 0; i < numSpace; i++) {
   buffer.append(" ");
  }
  this.indent = buffer.toString();
 }
 
 public void setLineSeperator(String lineSeperator) {
  this.lineSeparator = lineSeperator;
 }
 
 //进行输出流的转换
 public void serialize(Document doc, OutputStream out) throws IOException {
  Writer writer;
  try {
   writer = new OutputStreamWriter(out,this.encoding);
   this.serialize(doc, writer);
  } catch (UnsupportedEncodingException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 //进行输出流的转换
 public void serialize(Document doc, File file) {
  try {
   Writer writer = new FileWriter(file);
   this.serialize(doc, writer);
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 //接受已经转换好的输出流,调用serializeNode方法进行递归写出
 public void serialize(Document doc, Writer writer) throws IOException {
  this.serializeNode(doc, writer, "");
  try {
   writer.flush();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 //递归方法写出所有检点
 public void serializeNode(Node node, Writer writer, String indentLevel)
   throws IOException {
  switch (node.getNodeType()) {
  case Node.DOCUMENT_NODE:
   this.caseDOCUMENT_NODE(node, writer, indentLevel);
   break;
  case Node.ELEMENT_NODE:
   this.caseELEMENT_NODE(node, writer, indentLevel);
   break;
  case Node.TEXT_NODE:
   this.caseTEXT_NODE(node, writer, indentLevel);
   break;
  case Node.CDATA_SECTION_NODE:
   this.caseCDATA_SECTION_NODE(node, writer, indentLevel);
   break;
  case Node.COMMENT_NODE:
   this.caseCOMMENT_NODE(node, writer, indentLevel);
   break;
  case Node.PROCESSING_INSTRUCTION_NODE:
   this.casePROCESSING_INSTRUCTION_NODE(node, writer, indentLevel);
   break;
  case Node.ENTITY_REFERENCE_NODE:
   this.caseENTITY_REFERENCE_NODE(node, writer, indentLevel);
   break;
  case Node.DOCUMENT_TYPE_NODE:
   this.caseDOCUMENT_TYPE_NODE(node, writer, indentLevel);
   break;
  }
 }
 
 //处理文档节点
 public void caseDOCUMENT_NODE(Node node, Writer writer, String indentLevel)
   throws IOException {
  Document doc = (Document) node;
  writer.write("<?xml version=/"1.0/" encoding=/"UTF-8/" standalone=/"yes/"?>");
  writer.write(this.lineSeparator);
  NodeList nodeList = doc.getChildNodes();
  if(nodeList != null) {
   for(int i=0; i<nodeList.getLength(); i++) {
    this.serializeNode(nodeList.item(i), writer, indentLevel);
   }
  }
  //也可以使用下面这一句代替上面的if子句,但是这种方式会忽略实体声明,DTD的声明
  //this.serializeNode(doc.getDocumentElement(), writer, indentLevel);
 }
 
 //处理元素节点
 public void caseELEMENT_NODE(Node node, Writer writer, String indentLevel) throws IOException {
  String name = node.getNodeName();
  writer.write(indentLevel + "<" + name);
  //遍历属性
  NamedNodeMap attrs = node.getAttributes();
  for(int i=0; i<attrs.getLength(); i++) {
   Node attr = attrs.item(i);
   writer.write(" " + attr.getNodeName() + "=/"");
   this.print(writer, attr.getNodeValue());
   writer.write("/"");
  }
  writer.write(">");

  //递归遍历子元素
  NodeList children = node.getChildNodes();
  if(children != null) {
   if(children.item(0)!=null && children.item(0).getNodeType()==Node.ELEMENT_NODE) {
    writer.write(this.lineSeparator);
   }
   
   for(int i=0; i<children.getLength(); i++) {
    this.serializeNode(children.item(i), writer, indentLevel+this.indent);
   }
   
   if(children.item(0) !=null &&children.item(children.getLength() - 1).getNodeType() == Node.ELEMENT_NODE) {
    writer.write(indentLevel);
   }
  }
  writer.write("</" + name + ">");
  writer.write(this.lineSeparator);
 }
 
 //处理CDATA片段
 public void caseCDATA_SECTION_NODE(Node node, Writer writer,
   String indentLevel) throws DOMException, IOException {
  writer.write("<![CDATA[" + node.getNodeValue() + "]]>");
 }
 
 //处理文本节点
 public void caseTEXT_NODE(Node node, Writer writer, String indentLevel) throws DOMException, IOException {
  this.print(writer, node.getNodeValue());

 }
 
 //处理注释节点
 public void caseCOMMENT_NODE(Node node, Writer writer, String indentLevel) throws DOMException, IOException {
  writer.write("<!--"+node.getNodeValue()+"-->");
  writer.write(this.lineSeparator);
 }
 
 //处理指令节点
 public void casePROCESSING_INSTRUCTION_NODE(Node node, Writer writer,
   String indentLevel) throws IOException {
  writer.write("<?" + node.getNodeName() + " "+ node.getNodeValue());
  writer.write(this.lineSeparator);
 }
 
 //处理实体引用
 public void caseENTITY_REFERENCE_NODE(Node node, Writer writer,
   String indentLevel) throws IOException {
  writer.write("&"+node.getNodeName()+";");
 }
 
 //处理文档类型节点
 public void caseDOCUMENT_TYPE_NODE(Node node, Writer writer,
   String indentLevel) throws IOException {
  DocumentType docType = (DocumentType)node;
  String publicId = docType.getPublicId();
  String systemId = docType.getSystemId();
  String internalSubset = docType.getInternalSubset();
  writer.write("<!DOCTYPE "+docType.getName());
  if(publicId != null) {
   writer.write("PUBLIC /"" + publicId + "/"");
  }
  if(systemId != null) {
   writer.write("SYSTEM /""+ systemId + "/"");
  }
  if(internalSubset != null) {
   writer.write(" [" + internalSubset + "]");
  }
  writer.write(">");
  writer.write(this.lineSeparator);
 }
 
 //处理预定义的实体引用
 public void print(Writer writer,String s) throws IOException {
  if(s == null) return;
  for(int i=0; i<s.length(); i++) {
   char c = s.charAt(i);
   switch(c) {
    case '<':
     writer.write("&lt;");
    case '>':
     writer.write("&gt;");
    case '&':
     writer.write("&amp;");
    case '/r':
     writer.write("&#xD;");
    default:
     writer.write(c);
   }
  }
 }
}

 

再进行XML解析读出,并调用上面的类进行递归写出:
package DOMParser;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class SerializeTester {
 private File inputXML = null;
 
 public SerializeTester(File inputXML) {
  this.inputXML = inputXML;
 }
 //进行DOM解析,得到doc元素,调用serialize方法进行写出
 public void test(OutputStream outputStream) throws SAXException,
   IOException {
  DOMParser parser = new DOMParser();
  InputSource inputSource = new InputSource(new FileInputStream(inputXML));
  parser.parse(inputSource);
  Document doc = parser.getDocument();
  DOMSerializer domSerializer = new DOMSerializer();
  domSerializer.setIndent(2);  //设置缩进
  domSerializer.serialize(doc, outputStream);
 }

 public static void main(String[] args) {
  String inputXMLURI = "D://workplace//test1.xml";
  String outputXMLURI = "D://workplace//test2.xml";
  SerializeTester tester = new SerializeTester(new File(inputXMLURI));
  try {
   tester.test(new FileOutputStream(new File(outputXMLURI)));
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (SAXException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }

 }
}
/*
DOM解析轻松入门(二)--DOM实现增删改查 2008-2-16
*/


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值