[xmlparser]Code-XMLParser

此博客展示了一个Java实现的XML解析器代码。定义了XML解析的文法,包含doc、xml、doctype等规则。通过Lexer类进行词法分析,将节点存储在列表中,避免解析回溯。实现了多个方法来解析不同类型的节点,如text、remark、cdata等,最终完成XML文档的解析。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package com.xml;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

/**
 * @author zfzheng
 *
 * //doctype不对root节点进行匹配判断
 * //文法未细化(文法节点的text不同,开始与结尾未划分),大致可以就行了(程序里是区分的)
 * 文法:
 * doc	->	xml {doctype} root
 * xml	-> <?xml version {encoding} ?>
 * doctype	-> <!DOCTYPE {text}+ >
 * root	-> node
 * node	->	(doctype | tag | text | remark | cdata) node
 * tag	-> <name {attr}>
 * remark	-> <!-- text -->
 * cdata	-> <![CDATA[ text ]]>
 * attr -> name = ("text" |'text')
 * name -> [^/s]+
 * text -> charset
 *
 */
public class XMLParser {

    private Lexer lex;
    private List nodeList=null;//先解析所有节点,避免解析回溯。

    private int cur=0;
    private int size=0;

    private XMLDocument parse(StringBuffer xml)throws Exception{
        lex=new Lexer(xml);
        nodeList=new ArrayList();
        while(!lex.eof()){
            nodeList.add(lex.nextToken());
        }
//        dumpNodeList();
        XMLDocument doc=doc();

        doc.dump();

        return doc;
    }

    private XMLDocument doc()throws InnerEndException{
        XMLDocument doc=new XMLDocument();
        cur=0;
        size=nodeList.size();
        //xml文档
        Node xml=xml();
        doc.setVersion(Float.parseFloat(xml.getAttributeValue("version")));
        doc.setEncoding(xml.getAttributeValue("encoding"));
        xml=null;
        //文档类型
        doc.setDocType(doctype());
        //内容
        doc.setRoot(root());
        return doc;
    }

    private Node text()throws InnerEndException{
        StringBuffer sb=new StringBuffer();
        String v=next();
        while(!v.startsWith("<")){
            sb.append(v);
            v=next();
        }
        back();

        Node node=new Node();
        node.setValue(sb.toString());
        node.setType(Node.NODE_TEXT);
        return node;
    }

    private Node remark()throws InnerEndException{
        StringBuffer sb=new StringBuffer();
        String v=next();
        while(!v.endsWith("-->")){
            sb.append(v);
            v=next();
        }
        sb.append(v);

        Node node=new Node();
        node.setValue(sb.toString());
        node.setType(Node.NODE_REMARK);
        return node;
    }

    private Node cdata()throws InnerEndException{
        StringBuffer sb=new StringBuffer();
        String v=next();
        while(!v.endsWith("]]>")){
            sb.append(v);
            v=next();
        }
        sb.append(v);

        Node node=new Node();
        node.setValue(sb.toString());
        node.setType(Node.NODE_CDATA);
        return node;
    }

    private Node node()throws InnerEndException{
        boolean closed=false;
        Node root=new Node();
        String v=nextNotSpace();
        if(v.charAt(0)=='<'){
            if(v.endsWith("/>")){
                closed=true;
                root.setValue(takeOff(v,1,2));
            }else if(v.endsWith(">")){
                root.setValue(takeOff(v));
            }else{
                root.setValue(v.substring(1));
                v=nextNotSpace();
                while(!v.endsWith(">")){//属性
                    back();
                    root.addAttribute(attribute());
                    v=nextNotSpace();
                }
                if(v.endsWith("/>")){
                   closed=true;
                }
            }
        }

        if(!closed){
            String matchEnd="</"+root.getValue()+'>';
            v = next();
            Node subNode;
            while(!matchEnd.equals(v)){//closed
                subNode=null;

                if(v.startsWith("<!--")){
                    back();
                    subNode=remark();
                }else if(v.startsWith("<![CDATA[")){
                    back();
                    subNode=cdata();
                }else if(v.charAt(0)=='<'){
                        back();
                        subNode=node();
                }else{
                    back();
                    subNode=text();
                }
                root.addChild(subNode);
                if(eof()){
                    break;
                }
                v=next();
            }
        }
        root.setType(Node.NODE_TAG);
        return root;
    }

    private Node root()throws InnerEndException{
        return node();
    }

    private String doctype()throws InnerEndException{
        String v=nextNotSpace();
        StringBuffer sb=new StringBuffer();
        if("<!DOCTYPE".equals(v)){
            sb.append(v);
            v=next();
            while(!">".equals(v)){
                sb.append(v);
                v=next();
            }
            sb.append(v);
        }else{
            backToNotSpace();
        }
        return sb.toString();
    }

    private Node xml()throws InnerEndException{
        String v=nextNotSpace();
        Node node=new Node();
        if("<?xml".equals(v)){
            node.setValue("xml");
            v=nextNotSpace();
            while(!"?>".equals(v)){
                back();
                node.addAttribute(attribute());
                v=nextNotSpace();
            }
        }
        node.setType(Node.NODE_XML);
        return node;
    }


    private Attribute attribute()throws InnerEndException{
        Attribute att=new Attribute();
        att.setName(nextNotSpace());
        nextNotSpace();//=
        att.setValue(takeOff(nextNotSpace()));
        return att;
    }

    private String nextNotSpace()throws InnerEndException{
        String v=next();
        while(isEmpty(v)){
            v=next();
        }
        return v;
    }

    private void backToNotSpace()throws InnerEndException{
        String v=(String)nodeList.get(--cur);
        while(isEmpty(v)){
            v=(String)nodeList.get(--cur);
        }
    }

    private void back(){
        if(cur>0){
            cur--;
        }
    }

    private boolean eof(){
        return cur>=size;
    }

    private String next()throws InnerEndException{
        if(cur>=size){
            throw new InnerEndException();
        }
        return (String)nodeList.get(cur++);
    }

    private boolean isEmpty(String v){
        return v.replaceAll("//s","").length()==0;
    }

    private String takeOff(String s,int beginLen,int endLen){
        return s.substring(1,s.length()-endLen);
    }

    private String takeOff(String s){
        if(s==null||s.length()<2){
            return s;
        }
        return s.substring(1,s.length()-1);
    }

    private void dumpNodeList(){
        for(int i=0,n=nodeList.size();i<n;i++){
            System.out.println(nodeList.get(i));
        }
    }

    class InnerEndException extends Exception{
    }

    public static XMLDocument parseFromFile(String fileName)throws Exception{
        BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
        StringBuffer sb=new StringBuffer();
        String line;
        while((line=br.readLine())!=null){
            sb.append(line).append('/n');
        }
        br.close();
        return parseFrom(sb);
    }

    public static XMLDocument parseFrom(StringBuffer xml)throws Exception{
        return new XMLParser().parse(xml);
    }


    public static void main(String[] args) throws Exception{
        if(args.length!=2){
            System.out.println("Usage: java com.xml.XMLParser xmlFile");
        }else{
            XMLParser.parseFromFile(args[1]);
        }
    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值