//通过正则从网页源码中匹配出想要的内容
public String[] htmlregex(String htmltext, String regExp,boolean haslabel) {
results = new Vector<String>();
String[] resstr = null;
if (htmltext == null)
return null;
StringBuffer str = new StringBuffer();
//去除换行符
char[] arrays = htmltext.toCharArray();
for (int i = 0; i < arrays.length; i++) {
if (arrays[i] != '\r' && arrays[i] != '\n')
str.append(arrays[i]);
}
htmltext = str.toString();
//正则匹配
try {
Pattern pattern = Pattern.compile(regExp, Pattern.CASE_INSENSITIVE);
int readn = 0;
int len = htmltext.length();
while (len > 0) {
Matcher matcher = pattern.matcher(htmltext);
//试图找到与该模式匹配的输入序列的下一个子序列。
//判断当前模式是否有匹配序列,没有就退出
if (!matcher.find())
break;
if (matcher.groupCount() == 0) {
break;
}
//将匹配的模式的数据暂存到results
int groupCount = matcher.groupCount();
for (int i = groupCount; i > 0; i--) {
String result = matcher.group(i);
results.add(result);
}
readn = matcher.end();
len -= readn;
htmltext = htmltext.substring(readn);
}
} catch (Throwable e) {
try {
throw e;
} catch (Throwable e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
//判断是否要标签
if(!haslabel){
resstr = getHtmlregexResults();
}else{
resstr = getHtmlregexResultsLabel();
}
return resstr;
}
public String[] htmlregex(String htmltext, String regExp,boolean haslabel) {
results = new Vector<String>();
String[] resstr = null;
if (htmltext == null)
return null;
StringBuffer str = new StringBuffer();
//去除换行符
char[] arrays = htmltext.toCharArray();
for (int i = 0; i < arrays.length; i++) {
if (arrays[i] != '\r' && arrays[i] != '\n')
str.append(arrays[i]);
}
htmltext = str.toString();
//正则匹配
try {
Pattern pattern = Pattern.compile(regExp, Pattern.CASE_INSENSITIVE);
int readn = 0;
int len = htmltext.length();
while (len > 0) {
Matcher matcher = pattern.matcher(htmltext);
//试图找到与该模式匹配的输入序列的下一个子序列。
//判断当前模式是否有匹配序列,没有就退出
if (!matcher.find())
break;
if (matcher.groupCount() == 0) {
break;
}
//将匹配的模式的数据暂存到results
int groupCount = matcher.groupCount();
for (int i = groupCount; i > 0; i--) {
String result = matcher.group(i);
results.add(result);
}
readn = matcher.end();
len -= readn;
htmltext = htmltext.substring(readn);
}
} catch (Throwable e) {
try {
throw e;
} catch (Throwable e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
//判断是否要标签
if(!haslabel){
resstr = getHtmlregexResults();
}else{
resstr = getHtmlregexResultsLabel();
}
return resstr;
}