我们接着上一篇继续分析DOM是如何来解析XML文档:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.hibernate.util.ConfigHelper;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
public class DomParseService {
public static List<Book> getBooks(InputStream inputStream) throws Exception{
List<Book> list = new ArrayList<Book>();
//获取DOM解析器的工厂实例factory
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
//通过工厂实例获取具体的解析器builder
DocumentBuilder builder = factory.newDocumentBuilder();
//通过解析器解析传入的参数inputstream解析指定的xml,解析完成之后会返回一个Document(这里我们可以理解成一棵树)
Document document = builder.parse(inputStream);
//或者这棵树上的元素(种类很多)
Element element = document.getDocumentElement();
NodeList bookNodes = element.getElementsByTagName("book");
for(int i=0;i<bookNodes.getLength();i++){
Element bookElement = (Element) bookNodes.item(i);
Book book = new Book();
book.setId(Integer.parseInt(bookElement.getAttribute("id")));
NodeList childNodes = bookElement.getChildNodes();
// System.out.println("*****"+childNodes.getLength());
for(int j=0;j<childNodes.getLength();j++){
if(childNodes.item(j).getNodeType()==Node.ELEMENT_NODE){
if("name".equals(childNodes.item(j).getNodeName())){
book.setName(childNodes.item(j).getFirstChild().getNodeValue());
System.out.println("nodeValue = " + childNodes.item(j).getFirstChild().getNodeValue());
}else if("price".equals(childNodes.item(j).getNodeName())){
book.setPrice(Float.parseFloat(childNodes.item(j).getFirstChild().getNodeValue()));
System.out.println("nodeValue = " + Float.parseFloat(childNodes.item(j).getFirstChild().getNodeValue()));
}
}
}//end for j
list.add(book);
}//end for i
return list;
}
public static void main(String[] args) {
InputStream inputStream = ConfigHelper.getResourceAsStream("/book.xml");
try {
getBooks(inputStream);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public boolean parse(boolean complete) throws XNIException, IOException {
//
// reset and configure pipeline and set InputSource.
if (fInputSource != null) {
try {
fValidationManager.reset();
fVersionDetector.reset(this);
resetCommon();
short version = fVersionDetector.determineDocVersion(fInputSource);
if (version == Constants.XML_VERSION_1_1) {
initXML11Components();
configureXML11Pipeline();
resetXML11();
} else {
configurePipeline();
reset();
}
// mark configuration as fixed
fConfigUpdated = false;
// resets and sets the pipeline.
fVersionDetector.startDocumentParsing((XMLEntityHandler) fCurrentScanner, version);
fInputSource = null;
} catch (XNIException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (IOException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (RuntimeException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (Exception ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw new XNIException(ex);
}
}
try {
return fCurrentScanner.scanDocument(complete);
} catch (XNIException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (IOException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (RuntimeException ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw ex;
} catch (Exception ex) {
if (PRINT_EXCEPTION_STACK_TRACE)
ex.printStackTrace();
throw new XNIException(ex);
}
} // parse(boolean):boolean
这里面有句话要特别注意:fVersionDetector.startDocumentParsing((XMLEntityHandler) fCurrentScanner, version);根据这个方法名,我们可以知道这里面做了很多事情:
/**
* Reset the reference to the appropriate scanner given the version of the
* document and start document scanning.
* @param scanner - the scanner to use
* @param version - the version of the document (XML 1.1 or XML 1.0).
*/
public void startDocumentParsing(XMLEntityHandler scanner, short version){
if (version == Constants.XML_VERSION_1_0){
fEntityManager.setScannerVersion(Constants.XML_VERSION_1_0);
}
else {
fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
}
// Make sure the locator used by the error reporter is the current entity scanner.
fErrorReporter.setDocumentLocator(fEntityManager.getEntityScanner());
// Note: above we reset fEntityScanner in the entity manager, thus in startEntity
// in each scanner fEntityScanner field must be reset to reflect the change.
//
fEntityManager.setEntityHandler(scanner);
scanner.startEntity(fXMLSymbol, fEntityManager.getCurrentResourceIdentifier(), fEncoding, null);
}
调用父类com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl的实现类方法:
/**
* This method notifies of the start of an entity. The DTD has the
* pseudo-name of "[dtd]" parameter entity names start with '%'; and
* general entities are just specified by their name.
*
* @param name The name of the entity.
* @param identifier The resource identifier.
* @param encoding The auto-detected IANA encoding name of the entity
* stream. This value will be null in those situations
* where the entity encoding is not auto-detected (e.g.
* internal entities or a document entity that is
* parsed from a java.io.Reader).
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void startEntity(String name,
XMLResourceIdentifier identifier,
String encoding, Augmentations augs) throws XNIException {
super.startEntity(name, identifier, encoding, augs);
// prepare to look for a TextDecl if external general entity
if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
setScannerState(SCANNER_STATE_TEXT_DECL);
}
// call handler
if (fDocumentHandler != null && name.equals("[xml]")) {
fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
}
} // startEntity(String,identifier,String)
仔细看这句:fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);它会调用com.sun.org.apache.xerces.internal.parsers.AbstractDOMParser类中的方法:
/**
* The start of the document.
*
* @param locator The system identifier of the entity if the entity
* is external, null otherwise.
* @param encoding The auto-detected IANA encoding name of the entity
* stream. This value will be null in those situations
* where the entity encoding is not auto-detected (e.g.
* internal entities or a document entity that is
* parsed from a java.io.Reader).
* @param namespaceContext
* The namespace context in effect at the
* start of this document.
* This object represents the current context.
* Implementors of this class are responsible
* for copying the namespace bindings from the
* the current context (and its parent contexts)
* if that information is important.
* @param augs Additional information that may include infoset augmentations
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void startDocument (XMLLocator locator, String encoding,
NamespaceContext namespaceContext, Augmentations augs)
throws XNIException {
if (!fDeferNodeExpansion) {
if (fDocumentClassName.equals (DEFAULT_DOCUMENT_CLASS_NAME)) {
fDocument = new DocumentImpl ();
fDocumentImpl = (CoreDocumentImpl)fDocument;
// REVISIT: when DOM Level 3 is REC rely on Document.support
// instead of specific class
// set DOM error checking off
fDocumentImpl.setStrictErrorChecking (false);
// set actual encoding
fDocumentImpl.setInputEncoding (encoding);
// set documentURI
fDocumentImpl.setDocumentURI (locator.getExpandedSystemId ());
}
else if (fDocumentClassName.equals (PSVI_DOCUMENT_CLASS_NAME)) {
fDocument = new PSVIDocumentImpl();
fDocumentImpl = (CoreDocumentImpl)fDocument;
fStorePSVI = true;
// REVISIT: when DOM Level 3 is REC rely on Document.support
// instead of specific class
// set DOM error checking off
fDocumentImpl.setStrictErrorChecking (false);
// set actual encoding
fDocumentImpl.setInputEncoding (encoding);
// set documentURI
fDocumentImpl.setDocumentURI (locator.getExpandedSystemId ());
}
else {
// use specified document class
try {
ClassLoader cl = ObjectFactory.findClassLoader();
Class documentClass = ObjectFactory.findProviderClass (fDocumentClassName,
cl, true);
fDocument = (Document)documentClass.newInstance ();
// if subclass of our own class that's cool too
Class defaultDocClass =
ObjectFactory.findProviderClass (CORE_DOCUMENT_CLASS_NAME,
cl, true);
if (defaultDocClass.isAssignableFrom (documentClass)) {
fDocumentImpl = (CoreDocumentImpl)fDocument;
Class psviDocClass = ObjectFactory.findProviderClass (PSVI_DOCUMENT_CLASS_NAME,
cl, true);
if (psviDocClass.isAssignableFrom (documentClass)) {
fStorePSVI = true;
}
// REVISIT: when DOM Level 3 is REC rely on
// Document.support instead of specific class
// set DOM error checking off
fDocumentImpl.setStrictErrorChecking(false);
// set actual encoding
fDocumentImpl.setInputEncoding(encoding);
// set documentURI
if (locator != null) {
fDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
}
}
}
catch (ClassNotFoundException e) {
// won't happen we already checked that earlier
}
catch (Exception e) {
throw new RuntimeException (
DOMMessageFormatter.formatMessage(
DOMMessageFormatter.DOM_DOMAIN,
"CannotCreateDocumentClass",
new Object [] {fDocumentClassName}));
}
}
fCurrentNode = fDocument;
}
else {
fDeferredDocumentImpl = new DeferredDocumentImpl(fNamespaceAware);
fDocument = fDeferredDocumentImpl;
fDocumentIndex = fDeferredDocumentImpl.createDeferredDocument();
// REVISIT: strict error checking is not implemented in deferred dom.
// Document.support instead of specific class
// set actual encoding
fDeferredDocumentImpl.setInputEncoding(encoding);
// set documentURI
fDeferredDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
fCurrentNodeIndex = fDocumentIndex;
}
} // startDocument(String,String)
再看看这句:fDeferredDocumentImpl = new DeferredDocumentImpl(fNamespaceAware); 到这里为止才算是新建了Document对象,所以我们代码里面:Document document = builder.parse(inputStream);
返回的真正对象是DeferredDocumentImpl
哎,找一段代码不容易啊
我看了IBM上面的一篇文章,可以帮助理解:DOM 文档操作和 XML 文件互相转换的 Java 实现
java需要了解的东西太多了,可能在不同的阶段对同一段代码的解读会不一样,这块东西在后期碰到了再去看看