DOM解析XML（三）

最新推荐文章于 2024-01-11 00:26:02 发布

toShareYou

最新推荐文章于 2024-01-11 00:26:02 发布

阅读量565

点赞数

CC 4.0 BY-SA版权

分类专栏： other

本文链接：https://blog.youkuaiyun.com/u011984172/article/details/43734081

other 专栏收录该内容

27 篇文章

订阅专栏

我们接着上一篇继续分析DOM是如何来解析XML文档：

import java.io.InputStream; 
import java.util.ArrayList; 
import java.util.List; 
 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
 
import org.hibernate.util.ConfigHelper;
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 
import org.w3c.dom.NodeList; 
import org.w3c.dom.Node; 
 
public class DomParseService { 
    public static List<Book> getBooks(InputStream inputStream) throws Exception{ 
        List<Book> list = new ArrayList<Book>();
        //获取DOM解析器的工厂实例factory
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        //通过工厂实例获取具体的解析器builder
        DocumentBuilder builder = factory.newDocumentBuilder(); 
        //通过解析器解析传入的参数inputstream解析指定的xml，解析完成之后会返回一个Document（这里我们可以理解成一棵树）
        Document document = builder.parse(inputStream);
        //或者这棵树上的元素（种类很多）
        Element element = document.getDocumentElement(); 
 
        NodeList bookNodes = element.getElementsByTagName("book"); 
        for(int i=0;i<bookNodes.getLength();i++){ 
            Element bookElement = (Element) bookNodes.item(i); 
            Book book = new Book(); 
            book.setId(Integer.parseInt(bookElement.getAttribute("id"))); 
            NodeList childNodes = bookElement.getChildNodes(); 
//          System.out.println("*****"+childNodes.getLength()); 
            for(int j=0;j<childNodes.getLength();j++){ 
                if(childNodes.item(j).getNodeType()==Node.ELEMENT_NODE){ 
                    if("name".equals(childNodes.item(j).getNodeName())){ 
                        book.setName(childNodes.item(j).getFirstChild().getNodeValue()); 
                        System.out.println("nodeValue = " + childNodes.item(j).getFirstChild().getNodeValue());
                    }else if("price".equals(childNodes.item(j).getNodeName())){ 
                        book.setPrice(Float.parseFloat(childNodes.item(j).getFirstChild().getNodeValue()));
                        System.out.println("nodeValue = " + Float.parseFloat(childNodes.item(j).getFirstChild().getNodeValue()));
                    } 
                } 
            }//end for j 
            list.add(book); 
        }//end for i 
        return list; 
    }
    
    public static void main(String[] args) {
    	InputStream inputStream = ConfigHelper.getResourceAsStream("/book.xml");
    	try {
			getBooks(inputStream);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

看到这里我看了很久还没弄清楚底层是如何来讲XML转化为Document对象的

public boolean parse(boolean complete) throws XNIException, IOException {
        //
        // reset and configure pipeline and set InputSource.
        if (fInputSource != null) {
            try {
                fValidationManager.reset();
                fVersionDetector.reset(this);
                resetCommon();
                
                short version = fVersionDetector.determineDocVersion(fInputSource);
                if (version == Constants.XML_VERSION_1_1) {
                    initXML11Components();
                    configureXML11Pipeline();
                    resetXML11();
                } else {
                    configurePipeline();
                    reset();
                }
                
                // mark configuration as fixed
                fConfigUpdated = false;
                
                // resets and sets the pipeline.
                fVersionDetector.startDocumentParsing((XMLEntityHandler) fCurrentScanner, version);
                fInputSource = null;
            } catch (XNIException ex) {
                if (PRINT_EXCEPTION_STACK_TRACE)
                    ex.printStackTrace();
                throw ex;
            } catch (IOException ex) {
                if (PRINT_EXCEPTION_STACK_TRACE)
                    ex.printStackTrace();
                throw ex;
            } catch (RuntimeException ex) {
                if (PRINT_EXCEPTION_STACK_TRACE)
                    ex.printStackTrace();
                throw ex;
            } catch (Exception ex) {
                if (PRINT_EXCEPTION_STACK_TRACE)
                    ex.printStackTrace();
                throw new XNIException(ex);
            }
        }
        
        try {
            return fCurrentScanner.scanDocument(complete);
        } catch (XNIException ex) {
            if (PRINT_EXCEPTION_STACK_TRACE)
                ex.printStackTrace();
            throw ex;
        } catch (IOException ex) {
            if (PRINT_EXCEPTION_STACK_TRACE)
                ex.printStackTrace();
            throw ex;
        } catch (RuntimeException ex) {
            if (PRINT_EXCEPTION_STACK_TRACE)
                ex.printStackTrace();
            throw ex;
        } catch (Exception ex) {
            if (PRINT_EXCEPTION_STACK_TRACE)
                ex.printStackTrace();
            throw new XNIException(ex);
        }
        
    } // parse(boolean):boolean

这里面有句话要特别注意：fVersionDetector.startDocumentParsing((XMLEntityHandler) fCurrentScanner, version);根据这个方法名，我们可以知道这里面做了很多事情：

/**
     * Reset the reference to the appropriate scanner given the version of the
     * document and start document scanning.
     * @param scanner - the scanner to use
     * @param version - the version of the document (XML 1.1 or XML 1.0).
     */
    public void startDocumentParsing(XMLEntityHandler scanner, short version){

        if (version == Constants.XML_VERSION_1_0){
            fEntityManager.setScannerVersion(Constants.XML_VERSION_1_0);
        }
        else {
            fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
        }
        // Make sure the locator used by the error reporter is the current entity scanner.
        fErrorReporter.setDocumentLocator(fEntityManager.getEntityScanner());
        
        // Note: above we reset fEntityScanner in the entity manager, thus in startEntity
        // in each scanner fEntityScanner field must be reset to reflect the change.
        // 
        fEntityManager.setEntityHandler(scanner);
        
        scanner.startEntity(fXMLSymbol, fEntityManager.getCurrentResourceIdentifier(), fEncoding, null);        
    }

调用父类com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl的实现类方法：

/**
     * This method notifies of the start of an entity. The DTD has the
     * pseudo-name of "[dtd]" parameter entity names start with '%'; and
     * general entities are just specified by their name.
     *
     * @param name     The name of the entity.
     * @param identifier The resource identifier.
     * @param encoding The auto-detected IANA encoding name of the entity
     *                 stream. This value will be null in those situations
     *                 where the entity encoding is not auto-detected (e.g.
     *                 internal entities or a document entity that is
     *                 parsed from a java.io.Reader).
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startEntity(String name,
                            XMLResourceIdentifier identifier,
                            String encoding, Augmentations augs) throws XNIException {

        super.startEntity(name, identifier, encoding, augs);

        // prepare to look for a TextDecl if external general entity
        if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
            setScannerState(SCANNER_STATE_TEXT_DECL);
        } 

        // call handler
        if (fDocumentHandler != null && name.equals("[xml]")) {
            fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
        }

    } // startEntity(String,identifier,String)

仔细看这句：fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);它会调用com.sun.org.apache.xerces.internal.parsers.AbstractDOMParser类中的方法：

/**
     * The start of the document.
     *
     * @param locator The system identifier of the entity if the entity
     *                 is external, null otherwise.
     * @param encoding The auto-detected IANA encoding name of the entity
     *                 stream. This value will be null in those situations
     *                 where the entity encoding is not auto-detected (e.g.
     *                 internal entities or a document entity that is
     *                 parsed from a java.io.Reader).
     * @param namespaceContext
     *                 The namespace context in effect at the
     *                 start of this document.
     *                 This object represents the current context.
     *                 Implementors of this class are responsible
     *                 for copying the namespace bindings from the
     *                 the current context (and its parent contexts)
     *                 if that information is important.
     * @param augs     Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startDocument (XMLLocator locator, String encoding,
    NamespaceContext namespaceContext, Augmentations augs)
    throws XNIException {

        if (!fDeferNodeExpansion) {
            if (fDocumentClassName.equals (DEFAULT_DOCUMENT_CLASS_NAME)) {
                fDocument = new DocumentImpl ();
                fDocumentImpl = (CoreDocumentImpl)fDocument;
                // REVISIT: when DOM Level 3 is REC rely on Document.support
                //          instead of specific class
                // set DOM error checking off
                fDocumentImpl.setStrictErrorChecking (false);
                // set actual encoding
                fDocumentImpl.setInputEncoding (encoding);
                // set documentURI
                fDocumentImpl.setDocumentURI (locator.getExpandedSystemId ());
            }
            else if (fDocumentClassName.equals (PSVI_DOCUMENT_CLASS_NAME)) {
                fDocument = new PSVIDocumentImpl();
                fDocumentImpl = (CoreDocumentImpl)fDocument;
                fStorePSVI = true;
                // REVISIT: when DOM Level 3 is REC rely on Document.support
                //          instead of specific class
                // set DOM error checking off
                fDocumentImpl.setStrictErrorChecking (false);
                // set actual encoding
                fDocumentImpl.setInputEncoding (encoding);
                // set documentURI
                fDocumentImpl.setDocumentURI (locator.getExpandedSystemId ());
            }
            else {
                // use specified document class
                try {
                    ClassLoader cl = ObjectFactory.findClassLoader();
                    Class documentClass = ObjectFactory.findProviderClass (fDocumentClassName,
                        cl, true);
                    fDocument = (Document)documentClass.newInstance ();

                    // if subclass of our own class that's cool too
                    Class defaultDocClass =
                    ObjectFactory.findProviderClass (CORE_DOCUMENT_CLASS_NAME,
                        cl, true);
                    if (defaultDocClass.isAssignableFrom (documentClass)) {
                        fDocumentImpl = (CoreDocumentImpl)fDocument;

                        Class psviDocClass = ObjectFactory.findProviderClass (PSVI_DOCUMENT_CLASS_NAME,
                            cl, true);
                        if (psviDocClass.isAssignableFrom (documentClass)) {
                            fStorePSVI = true;
                        }

                        // REVISIT: when DOM Level 3 is REC rely on
                        //          Document.support instead of specific class
                        // set DOM error checking off
                        fDocumentImpl.setStrictErrorChecking(false);
                        // set actual encoding
                        fDocumentImpl.setInputEncoding(encoding);
                        // set documentURI
                        if (locator != null) {
                            fDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
                        }
                    }
                }
                catch (ClassNotFoundException e) {
                    // won't happen we already checked that earlier
                }
                catch (Exception e) {
                    throw new RuntimeException (
                        DOMMessageFormatter.formatMessage(
                        DOMMessageFormatter.DOM_DOMAIN,
                        "CannotCreateDocumentClass",
                        new Object [] {fDocumentClassName}));
                }
            }
            fCurrentNode = fDocument;
        }
        else {
            fDeferredDocumentImpl = new DeferredDocumentImpl(fNamespaceAware);
            fDocument = fDeferredDocumentImpl;
            fDocumentIndex = fDeferredDocumentImpl.createDeferredDocument();
            // REVISIT: strict error checking is not implemented in deferred dom.
            //          Document.support instead of specific class

            // set actual encoding
            fDeferredDocumentImpl.setInputEncoding(encoding);
            // set documentURI
            fDeferredDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
            fCurrentNodeIndex = fDocumentIndex;

        }

    } // startDocument(String,String)

再看看这句：fDeferredDocumentImpl = new DeferredDocumentImpl(fNamespaceAware); 到这里为止才算是新建了Document对象，所以我们代码里面：Document document = builder.parse(inputStream);

返回的真正对象是DeferredDocumentImpl

哎，找一段代码不容易啊

我看了IBM上面的一篇文章，可以帮助理解：DOM 文档操作和 XML 文件互相转换的 Java 实现

java需要了解的东西太多了，可能在不同的阶段对同一段代码的解读会不一样，这块东西在后期碰到了再去看看