手写的一个非常小的SAX XML解析器,可以随便的copy修改,功能有限只能解析Format Well的UTF-8编码的XML文件,考虑到实际项目中不会用到JSR的SAXParser所以手写.经测试未发现bug,这个是demo版,项目中略有改动
import java.util.Vector;



/**//**
* XMLParser.java
*
* @author 袁东
* @create:2008-3-20 下午06:11:09
* @version beta
* @description 驱动实现了SAX的解析方式,
*/

public class SAXParser ...{

/**//**
* 解析指针进入tag
* */
private boolean inTag = false;

/**//**
* 读取指针
* */
private int pointer = 0;

/**//**
* 解析器状态
* */
private byte parserState = -1;

/**//**
* element字符串buffer
* */
private StringBuffer eBuffer = null;

/**//**
* text字符串buffer
* */
private StringBuffer tBuffer = null;

/**//**
* XML文件buffer
* */
private char[] xmlBuffer = null;

/**//**
* element数组
* */
private String[] eleArray = null;

/**//**
* 解析驱动器
* */

public void parse(String xml)...{
xmlBuffer = xml.toCharArray();
eBuffer = new StringBuffer();
setParserState(PREPARE);

while(getParserState() != END_DOCUMENT)...{ // 非终止状态

if(getParserState() == PREPARE)...{

if(xmlBuffer[pointer] == '<')...{
setParserState(START_DOCUMENT); // 解析开始
startDocument(); // call startDocument()

while(xmlBuffer[pointer] != '>')...{
pointer++;
}
setParserState(OVERHEAD); // Head结束
}
}

else if(getParserState() == OVERHEAD)...{

if(pointer >= (xmlBuffer.length - 1))...{ // 解析结束
setParserState(END_DOCUMENT);
endDocument(); //call endDocument()
}

else if(xmlBuffer[pointer] == '<')...{
pointer++; // 指针后移匹配 "</"

if(xmlBuffer[pointer] == '/')...{ // Element结束
setParserState(END_ELEMENT);
}

else...{
setParserState(START_ELEMENT); // Element开始
}
}
}

else if(getParserState() == START_ELEMENT)...{
eBuffer = new StringBuffer();
eleArray = null;
pointer--;

while(xmlBuffer[pointer] != '>' && xmlBuffer[pointer] != '/')...{
eBuffer.append(xmlBuffer[pointer++]);// bufferring字符
}
eleArray = split(eBuffer.toString(), " ");

if (eleArray.length > 1) ...{ // 多个Attributes
String[] copyArray = new String[eleArray.length - 1];
System.arraycopy(eleArray, 1, copyArray, 0,
copyArray.length);
startElement(null, null, eleArray[0], copyArray);

} else ...{
startElement(null, null, eleArray[0], null);
}

if (xmlBuffer[pointer] == '/') ...{ //无子节点
setParserState(END_ELEMENT);
endElement(null, null, eleArray[0]); // call endElement();
setParserState(OVERHEAD);

} else ...{
setParserState(TEXT);
}
}

else if(getParserState() == TEXT)...{

if(xmlBuffer[pointer] == '<')...{
setParserState(OVERHEAD); // 邻接字符为'<'则无TEXT内容
pointer--;

}else...{
tBuffer = new StringBuffer();

while(xmlBuffer[pointer] != '<')...{
tBuffer.append(xmlBuffer[pointer++]);
}
characters(tBuffer.toString()); // call characters();
setParserState(OVERHEAD);
pointer--;
}
}

else if(getParserState() == END_ELEMENT)...{
eBuffer = new StringBuffer();

while(xmlBuffer[pointer] != '>')...{
eBuffer.append(xmlBuffer[pointer++]);
}
endElement(null, null, eBuffer.toString()); // call endElement();
setParserState(OVERHEAD);
}
pointer++; // 每次字符指针后移
}
xmlBuffer = null;
System.gc();
}


/**//**
* XML DOC开始时会被调用
* */

public void startDocument() ...{
System.out.println("start doc");
}

/**//**
* XML DOC结束时会调用
* */

public void endDocument() ...{
System.out.println("end doc");
}


/**//**
* 遇到element元素开始时会调用
*
* @param uri
* @param localName
* @param name 元素name
* */

public void startElement(String uri, String localName, String name, String[] array)...{
System.out.println("start element:" + name );

if(array != null)...{
System.out.println("length: " + array.length);
}
}

/**//**
* Text内容不为空时会调用
*
* @param content text内容
* */

public void characters(String content) ...{
System.out.println("characters: " + content);
}


/**//**
* 遇到element元素结束时会调用
* @param uri
* @param localName
* @param name 元素name
* */

public void endElement(String uri, String localName, String name) ...{
System.out.println("endElement uri:" + uri + " localName:" + localName
+ " name: " + name);
}

/**//**
* 获取解析器状态
*
* @return byte 状态
* */

private byte getParserState() ...{
return parserState;
}


/**//**
* 设置解析器状态
*
* @param parserState 解析器状态
* */

public void setParserState(byte parserState) ...{
this.parserState = parserState;
}


/**//**
* 分割字符串,原理:检测字符串中的分割字符串,然后取子串
*
* @param original 需要分割的字符串
* @paran regex 分割字符串
* @return String[] 分割后生成的字符串数组
*/

private String[] split(String original, String regex) ...{
int startIndex = 0; // 取子串的起始位置
Vector v = new Vector(); // 将结果数据先放入Vector中
String[] str = null; // 返回的结果字符串数组
int index = 0; // 存储取子串时起始位置

startIndex = original.indexOf(regex); // 获得匹配子串的位置
// 如果起始字符串的位置小于字符串的长度,则证明没有取到字符串末尾。
// -1代表取到了末尾

while (startIndex < original.length() && startIndex != -1) ...{
String temp = original.substring(index, startIndex);
v.addElement(temp); // 取子串
index = startIndex + regex.length(); // 设置取子串的起始位置
startIndex = original.indexOf(regex, startIndex + regex.length());
// 获得匹配子串的位置
} // 取结束的子串
v.addElement(original.substring(index + 1 - regex.length()));
str = new String[v.size()]; // 将Vector对象转换成数组

for (int i = 0; i < v.size(); i++) ...{
str[i] = (String) v.elementAt(i);
}
v = null;
return str;
}

/**//**
* 准备解析XML
* */
public static final byte PREPARE = 0;

/**//**
* 解析XML文档开始
* */
public static final byte START_DOCUMENT = 1;

/**//**
* 掠过Head
* */
public static final byte OVERHEAD = 2;

/**//**
* 解析XML元素开始
* */
public static final byte START_ELEMENT = 3;

/**//**
* 解析XML元素结束
* */
public static final byte END_ELEMENT = 4;

/**//**
* 解析text
* */
public static final byte TEXT = 5;

/**//**
* 解析XML文档结束
* */
public static final byte END_DOCUMENT = 6;

}
