package com.hfxt;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test4 {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO 自动生成方法存根
List<String> list=getMails();
for (String mail : list) {
System.out.println(mail);
}
}
public static List<String> getMails() throws IOException{
URL url=new URL("http://email.163.com/");
//读取网页文件
BufferedReader buff=new BufferedReader(new InputStreamReader(url.openStream()));
//对读取的数据进行匹配,验证符合验证规则的数据
String rule="\\w+@\\w+(\\.\\w+)+";
List<String> list=new ArrayList<String>();
Pattern p=Pattern.compile(rule);
String line = null;
while((line=buff.readLine())!=null){
Matcher m=p.matcher(line);
while(m.find()){
list.add(m.group());
}
}
return list;
}
}
网络邮箱地址爬虫
最新推荐文章于 2025-01-23 14:32:30 发布