CharacterReader这个类是HtmlParser中的,主要是用作解析html标签源码的,自己在代码中加了些注释。
import java.util.Locale;
/**
* CharacterReader consumes tokens off a string. To replace the old TokenQueue.
*/
public class CharacterReader {
static final char EOF = (char) -1;
private final char[] input;
private final int length;
private int pos = 0;
private int mark = 0;
/**
* 初始化字符数组
*
* @param input
*/
public CharacterReader(String input) {
this.input = input.toCharArray();
this.length = this.input.length;
}
/**
* 返回当前数组下标INDEX
*
* @return
*/
public int pos() {
return pos;
}
/**
* 返回数组下标INDEX是否超出数组长度
*
* @return
*/
public boolean isEmpty() {
return pos >= length;
}
/**
* 数组下标如果超出数组长度,返回空字符;否则返回数组下标字符
*
* @return
*/
public char current() {
return pos >= length ? EOF : input[pos];
}
/**
* 数组下标如果超出数组长度,返回空字符;否则返回数组下标字符 数组下标自增
*
* @return
*/
public char consume() {
char val = pos >= length ? EOF : input[pos];
pos++;
return val;
}
/**
* 数组下标自减
*/
public void unconsume() {
pos--;
}
/**
* 数组下标自增
*/
public void advance() {
pos++;
}
/**
* 当前数组下标赋予标记值
*/
public void mark() {
mark = pos;
}
/**
* 返回当前标记值
*/
public int getMark(){
return mark;
}
/**
* 标记值赋予当前下标
*/
public void rewindToMark() {
pos = mark;
}
/**
* 返回当前数组下标所在字符串(长度1) 数组下标自增
*
* @return
*/
public String consumeAsString() {
return new String(input, pos++, 1);
}
/**
* 返回当前位置和下一个之间的输入字符的字符数
*
* @param c
* 输入字符
* @return 当前位置和字符所在位置的偏移量;如果没有发现则返回-1
*/
public int nextIndexOf(char c) {
// doesn't handle scanning for surrogates
for (int i = pos; i < length; i++) {
if (c == input[i])
return i - pos;
}
return -1;
}
/**
* 返回当前位置与输入字符序列中字符数
*
* @param seq
* 输入字符序列
* @return 当前位置和字符序列所在位置的偏移量;如果没有发现则返回-1
*/
public int nextIndexOf(CharSequence seq) {
// doesn't handle scanning for surrogates
char startChar = seq.charAt(0);
for (int offset = pos; offset < length; offset++) {
// scan to first instance of startchar:
if (startChar != input[offset])
while (++offset < length && startChar != input[offset])
;
int i = offset + 1;
int last = i + seq.length() - 1;
if (offset < length && last <= length) {
for (int j = 1; i < last && seq.charAt(j) == input[i]; i++, j++)
;
if (i == last) // found full sequence
return offset - pos;
}
}
return -1;
}
/**
* 返回当前位置与输入字符之间的字符串
*
* @param c
* @return
*/
public String consumeTo(char c) {
int offset = nextIndexOf(c);
if (offset != -1) {
String consumed = new String(input, pos, offset);
pos += offset;
return consumed;
} else {
return consumeToEnd();
}
}
/**
* 返回当前位置与输入字符序列之间的字符串
*
* @param seq
* @return
*/
public String consumeTo(String seq) {
int offset = nextIndexOf(seq);
if (offset != -1) {
String consumed = new String(input, pos, offset);
pos += offset;
return consumed;
} else {
return consumeToEnd();
}
}
/**
* 匹配输入字符数组中任何一个,返回当前位置与匹配位置之间的字符串
*
* @param chars
* @return
*/
public String consumeToAny(final char... chars) {
int start = pos;
OUTER: while (pos < length) {
for (int i = 0; i < chars.length; i++) {
if (input[pos] == chars[i])
break OUTER;
}
pos++;
}
return pos > start ? new String(input, start, pos - start) : "";
}
/**
* 返回当前位置到结束位置之间的字符串
*
* @return
*/
public String consumeToEnd() {
String data = new String(input, pos, length - pos);
pos = length;
return data;
}
/**
* 返回当前位置到匹配大小写字符之间的字符串
*
* @return
*/
public String consumeLetterSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
pos++;
else
break;
}
return new String(input, start, pos - start);
}
/**
* 返回当前位置到匹配大小写字符和数字之间的字符串
*
* @return
*/
public String consumeLetterThenDigitSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
pos++;
else
break;
}
while (!isEmpty()) {
char c = input[pos];
if (c >= '0' && c <= '9')
pos++;
else
break;
}
return new String(input, start, pos - start);
}
/**
* 返回当前位置到匹配16进制字符之间的字符串
*
* @return
*/
public String consumeHexSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f'))
pos++;
else
break;
}
return new String(input, start, pos - start);
}
/**
* 返回当前位置到匹配数字字符之间的字符串
*
* @return
*/
public String consumeDigitSequence() {
int start = pos;
while (pos < length) {
char c = input[pos];
if (c >= '0' && c <= '9')
pos++;
else
break;
}
return new String(input, start, pos - start);
}
/**
* 返回当前数组中字符是否等于输入值
*
* @param c
* @return
*/
public boolean matches(char c) {
return !isEmpty() && input[pos] == c;
}
public boolean matches(String seq) {
int scanLength = seq.length();
if (scanLength > length - pos)
return false;
for (int offset = 0; offset < scanLength; offset++)
if (seq.charAt(offset) != input[pos + offset])
return false;
return true;
}
public boolean matchesIgnoreCase(String seq) {
int scanLength = seq.length();
if (scanLength > length - pos)
return false;
for (int offset = 0; offset < scanLength; offset++) {
char upScan = Character.toUpperCase(seq.charAt(offset));
char upTarget = Character.toUpperCase(input[pos + offset]);
if (upScan != upTarget)
return false;
}
return true;
}
public boolean matchesAny(char... seq) {
if (isEmpty())
return false;
char c = input[pos];
for (char seek : seq) {
if (seek == c)
return true;
}
return false;
}
/**
* 判断字符是否为字母大小写
* @return
*/
public boolean matchesLetter() {
if (isEmpty())
return false;
char c = input[pos];
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
/**
* 判断字符是否为数字
* @return
*/
public boolean matchesDigit() {
if (isEmpty())
return false;
char c = input[pos];
return (c >= '0' && c <= '9');
}
public boolean matchConsume(String seq) {
if (matches(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
public boolean matchConsumeIgnoreCase(String seq) {
if (matchesIgnoreCase(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
public boolean containsIgnoreCase(String seq) {
// used to check presence of </title>, </style>. only finds consistent
// case.
String loScan = seq.toLowerCase(Locale.ENGLISH);
String hiScan = seq.toUpperCase(Locale.ENGLISH);
return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1);
}
@Override
public String toString() {
return new String(input, pos, length - pos);
}
}