挺喜欢用Java写一些小东西的,这次的内容比较简单,是利用正则表达式提取本地文件或者URL中的邮箱信息。
正则表达式:
"[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"
代码如下:
1 import java.util.regex.*; 2 import java.io.*; 3 import java.net.*; 4 import java.util.*; 5 6 public class EmailCatch { 7 8 private BufferedReader br; 9 private PrintWriter pw; 10 private List<String> m; 11 12 public EmailCatch(String tf) { //tf是目的文件名 13 try { 14 this.pw = new PrintWriter(new FileWriter(tf,true)); 15 this.m = new ArrayList<String>(); 16 } catch (IOException e) { 17 e.printStackTrace(); 18 } 19 } 20 21 public EmailCatch(String sf, String tf) { //sf是源文件 22 this(tf); 23 try { 24 this.br = new BufferedReader(new FileReader(sf)); 25 } catch (IOException e) { 26 e.printStackTrace(); 27 } 28 } 29 30 31 public void start() { 32 try { 33 String s = null; 34 String result = null; 35 while ((s = br.readLine()) != null) { 36 if ((result = getEmail(s)) != null) { 37 m.add(result); 38 pw.println(result); 39 pw.flush(); 40 } 41 } 42 43 show(); //show方法是在控制台验证 44 45 pw.close(); 46 br.close(); 47 48 49 } catch (IOException e) { 50 e.printStackTrace(); 51 } 52 } 53 54 public void start(String urlAdd) { 55 try { 56 URL url = new URL(urlAdd); 57 URLConnection conn = url.openConnection(); 58 59 br = new BufferedReader(new InputStreamReader(conn.getInputStream())); 60 61 start(); 62 63 64 } catch (MalformedURLException e) { 65 e.printStackTrace(); 66 } catch (IOException e) { 67 e.printStackTrace(); 68 } 69 70 } 71 72 public String getEmail(String s) { 73 Pattern p = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"); //邮箱正则表达式。 74 Matcher m = p.matcher(s); 75 if (m.find()) { 76 return m.group(); 77 } else { 78 return null; 79 } 80 } 81 82 public void show() { 83 for (int i = 0; i < m.size(); ++i) { 84 System.out.println(i + ":" + m.get(i)); 85 } 86 } 87 88 public static void main(String[] args) { 89 90 new EmailCatch("d:/email.txt","d:/emailCatcher.txt").start();//本地文件打开 91 System.out.println("========================"); 92 new EmailCatch("d:/emailCatcherURL.txt").start("http://tieba.baidu.com/p/941471635");//网页直接打开 93 94 } 95 96 }
main方法里做了两个例子。
另外想问问大家对于邮箱的正则表达式是否有更好的写法?
-END