public static final String patternString="<\\s*a\\s+href=\"([^\"]+)\"";
public static ArrayList<String> extractLink(String page)
{
ArrayList<String> list=new ArrayList<String>();
Pattern pattern=Pattern.compile(patternString,Pattern.DOTALL);
Matcher matcher=pattern.matcher(page);
while(matcher.find())
{
//获得匹配好的字符串,并去掉首部
String temp1=matcher.group();
String temp2=temp1.substring(9,temp1.length()-1);
//从中提取超链接,只要正确的链接
if(temp2.indexOf("h")==0)
{
list.add(temp2);
}
}
for(int i=0;i<list.size();i++)
{
System.out.println(list.get(i));
}
return list;
}//extractLink