# Java API for XML Processing (JAXP)
# Study Note of JAVA Tutorial
Document Object Model APIs
node 接口是文档对象模型的主要类型,它代表了文档树中的单个节点。Node 有很多子类(接口),具体属性如下表:
node(subclass) | Named Constant | nodeName | nodeValue | attributes |
---|---|---|---|---|
Attr | Node.ATTRIBUTE_NODE | same as Attr.name | same as Attr.value | null |
CDATASection | Node.CDATA_SECTION_NODE | "#cdata-section" | same as CharacterData.data , the content of the CDATA Section | null |
Comment | Node.COMMENT_NODE | "#comment" | same as CharacterData.data , the content of the comment | null |
Document | Node.DOCUMENT_NODE | "#document" | null | null |
DocumentFragment | Node.DOCUMENT_FRAGMENT_NODE | "#document-fragment" | null | null |
DocumentType | Node.DOCUMENT_TYPE_NODE | same as DocumentType.name | null | null |
Element | Node.ELEMENT_NODE | same as Element.tagName | null | NamedNodeMap |
Entity | Node.ENTITY_NODE | entity name | null | null |
EntityReference | Node.ENTITY_REFERENCE_NODE | name of entity referenced | null | null |
Notation | Node.NOTATION_NODE | notation name | null | null |
ProcessingInstruction | Node.PROCESSING_INSTRUCTION_NODE | same as ProcessingInstruction.target | same as ProcessingInstruction.data | null |
Text | Node.TEXT_NODE | "#text" | same as CharacterData.data , the content of the text node | null |
节点类型 | 描述 | 子元素 |
---|---|---|
Document | 表示整个文档(DOM 树的根节点) |
|
DocumentFragment | 表示轻量级的 Document 对象,其中容纳了一部分文档。 |
|
DocumentType | 向为文档定义的实体提供接口。 | None |
ProcessingInstruction | 表示处理指令。 | None |
EntityReference | 表示实体引用元素。 |
|
Element | 表示 element(元素)元素 |
|
Attr | 表示属性。 |
|
Text | 表示元素或属性中的文本内容。 | None |
CDATASection | 表示文档中的 CDATA 区段(文本不会被解析器解析) | None |
Comment | 表示注释。 | None |
Entity | 表示实体。 |
|
Notation | 表示在 DTD 中声明的符号。 | None |
把xml dom parsing 分为3个步骤:
1. Instantiate the Factory and Set Properties
2. Get a Parser and set Error Handler
3. Parse the File and Get DOM Tree
domparser 类实现了上面三个步骤,代码如下:


package dom; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; public class domparser { static final String outputEncoding = "UTF-8"; /* Constants used for XML validation */ static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; static final String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource"; public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException { /* * dtdValidate : DTD validation, if true, do validation * xsdValidate : W3C XML Schema validation, if true, do validation * schemaSource : schema source XSD file * ignoreWhitespace : if true, ignore white space * ignoreComments : if true, ignore comments * putCDATAIntoText : if true, put CDATA into Text nodes * createEntityRefs : create EntityReference nodes */ String filename = "/sandbox/javatest/data.xml"; boolean dtdValidate = false; boolean xsdValidate = false; String schemaSource = null; boolean ignoreWhitespace = true; boolean ignoreComments = false; boolean putCDATAIntoText = false; boolean createEntityRefs = false; /** Step 1: create a DocumentBuilderFactory and configure it */ DocumentBuilderFactory dbf =DocumentBuilderFactory.newInstance(); /* * Set namespaceAware to true to get a DOM Level 2 tree with nodes containing NameSapce information. */ dbf.setNamespaceAware(true); // Set the validation mode: no validation, DTD validation, or XSD validation dbf.setValidating(dtdValidate || xsdValidate); if (xsdValidate) { try { dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); } catch (IllegalArgumentException x) { // This can happen if the parser does not support JAXP 1.2 System.err.println( "Error: JAXP DocumentBuilderFactory attribute not recognized: " + JAXP_SCHEMA_LANGUAGE); System.err.println( "Check to see if parser conforms to JAXP 1.2 spec."); System.exit(1); } } // Set the schema source. if (schemaSource != null) { dbf.setAttribute(JAXP_SCHEMA_SOURCE, new File(schemaSource)); } // Optional: set various configuration options dbf.setIgnoringComments(ignoreComments); dbf.setIgnoringElementContentWhitespace(ignoreWhitespace); dbf.setCoalescing(putCDATAIntoText); // The opposite of creating entity reference nodes is expanding them inline dbf.setExpandEntityReferences(!createEntityRefs); /** Step 2: create a DocumentBuilder that satisfies the constraints specified by the DocumentBuilderFactory*/ DocumentBuilder db = dbf.newDocumentBuilder(); // Set an ErrorHandler before parsing OutputStreamWriter errorWriter = new OutputStreamWriter(System.err, outputEncoding); db.setErrorHandler( new MyErrorHandler(new PrintWriter(errorWriter, true))); /** Step 3: parse the input file and handle dom tree*/ Document doc = db.parse(new File(filename)); // handling the DOM tree OutputStreamWriter outWriter = new OutputStreamWriter(System.out, outputEncoding); new domecho(new PrintWriter(outWriter, true)).echo(doc); } private static class MyErrorHandler implements ErrorHandler { /** Error handler output goes here */ private PrintWriter out; MyErrorHandler(PrintWriter out) { this.out = out; } /** * Returns a string describing parse exception details */ private String getParseExceptionInfo(SAXParseException spe) { String systemId = spe.getSystemId(); if (systemId == null) { systemId = "null"; } String info = "URI=" + systemId + " Line=" + spe.getLineNumber() + ": " + spe.getMessage(); return info; } // The following methods are standard SAX ErrorHandler methods. // See SAX documentation for more info. public void warning(SAXParseException spe) throws SAXException { out.println("Warning: " + getParseExceptionInfo(spe)); } public void error(SAXParseException spe) throws SAXException { String message = "Error: " + getParseExceptionInfo(spe); throw new SAXException(message); } public void fatalError(SAXParseException spe) throws SAXException { String message = "Fatal Error: " + getParseExceptionInfo(spe); throw new SAXException(message); } } }
然后是DOM Tree 的处理部分,这里只是把node 信息输出,实现该功能的是domecho类,代码如下:


package dom; import java.io.PrintWriter; import org.w3c.dom.DocumentType; import org.w3c.dom.Entity; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; public class domecho { private PrintWriter out; /** Indent level */ private int indent = 0; /** Indentation will be in multiples of basicIndent */ private final String basicIndent = " "; domecho(PrintWriter out) { this.out = out; } /** * Echo common attributes of a DOM2 Node and terminate output with an * EOL character. */ private void printlnCommon(Node n) { out.print(" nodeName=\"" + n.getNodeName() + "\""); String val = n.getNamespaceURI(); if (val != null) { out.print(" uri=\"" + val + "\""); } val = n.getPrefix(); if (val != null) { out.print(" pre=\"" + val + "\""); } val = n.getLocalName(); if (val != null) { out.print(" local=\"" + val + "\""); } val = n.getNodeValue(); if (val != null) { out.print(" nodeValue="); if (val.trim().equals("")) { // Whitespace out.print("[WS]"); } else { out.print("\"" + n.getNodeValue() + "\""); } } out.println(); } /** * Indent to the current level in multiples of basicIndent */ private void outputIndentation() { for (int i = 0; i < indent; i++) { out.print(basicIndent); } } /** * Recursive routine to print out DOM tree nodes */ public void echo(Node n) { // Indent to the current level before printing anything outputIndentation(); int type = n.getNodeType(); switch (type) { case Node.ATTRIBUTE_NODE: out.print("ATTR:"); printlnCommon(n); break; case Node.CDATA_SECTION_NODE: out.print("CDATA:"); printlnCommon(n); break; case Node.COMMENT_NODE: out.print("COMM:"); printlnCommon(n); break; case Node.DOCUMENT_FRAGMENT_NODE: out.print("DOC_FRAG:"); printlnCommon(n); break; case Node.DOCUMENT_NODE: out.print("DOC:"); printlnCommon(n); break; case Node.DOCUMENT_TYPE_NODE: out.print("DOC_TYPE:"); printlnCommon(n); // Print entities if any NamedNodeMap nodeMap = ((DocumentType)n).getEntities(); indent += 2; for (int i = 0; i < nodeMap.getLength(); i++) { Entity entity = (Entity)nodeMap.item(i); echo(entity); } indent -= 2; break; case Node.ELEMENT_NODE: out.print("ELEM:"); printlnCommon(n); /* * Print attributes if any. * Note: element attributes are not children of ELEMENT_NODEs . * But are properties of their associated ELEMENT_NODE. * For this reason, they are printed with 2x the indent level to indicate this. */ NamedNodeMap atts = n.getAttributes(); indent += 2; for (int i = 0; i < atts.getLength(); i++) { Node att = atts.item(i); echo(att); } indent -= 2; break; case Node.ENTITY_NODE: out.print("ENT:"); printlnCommon(n); break; case Node.ENTITY_REFERENCE_NODE: out.print("ENT_REF:"); printlnCommon(n); break; case Node.NOTATION_NODE: out.print("NOTATION:"); printlnCommon(n); break; case Node.PROCESSING_INSTRUCTION_NODE: out.print("PROC_INST:"); printlnCommon(n); break; case Node.TEXT_NODE: out.print("TEXT:"); printlnCommon(n); break; default: out.print("UNSUPPORTED NODE: " + type); printlnCommon(n); break; } // Print children if any indent++; for (Node child = n.getFirstChild(); child != null; child = child.getNextSibling()) { echo(child); } indent--; } }