文章目录
特殊的表达式
零宽断言
Pattern pattern = Pattern.compile("a?");
//零宽断言,所以为true
p("".matches("a*")); //true
p("weEs".matches("a*")); //false
p("".matches("a?")); //true
Matcher matcher = pattern.matcher("fgs");
p(matcher.find()); //true
p("".matches("")); //true
Matcher的基本用法
Pattern p = Pattern.compile("\\d{3,5}");
String s = "123-34345-234-00";
Matcher m = p.matcher(s);
p(m.matches()); //false
m.reset(); //将匹配位置从maches匹配完的结束位置还原到头部
p(m.find()); //true
p(m.start() + "-" + m.end()); //0-3,start为起始位置,end为结束位置的后一位
p(m.find()); //true
p(m.start() + "-" + m.end()); //4-9
p(m.find()); //true
p(m.start() + "-" + m.end()); //10-13
p(m.find()); //false
//p(m.start() + "-" + m.end()); 如果找不到还写start会报异常
p(m.lookingAt());//true 从头部开始找
p(m.lookingAt());//true
p(m.lookingAt());//true
p(m.lookingAt());//true
空白行
p(" \n".matches("^[\\s&&[^\\n]]*\\n$"));
//如果用readLine从文件读取要去掉\\n
p(" \n".matches("^[\\s&&[^\\n]]*$"));
分组(group)
//group,看左小括号,第一个左小括号就是第一组,第二个就是第二组
Pattern pattern = Pattern.compile("(\\d{3,5})([a-z]{2})");
String s = "123aa-34345bb-234cc-00";
Matcher matcher = pattern.matcher(s);
while (matcher.find()) {
p(matcher.group(1));
//输出
/*123
34345
2348*/
p(matcher.group(1));
//输出
/*123aa
34345bb
234cc*/
}
quantifiers
//Greedy quantifiers 贪婪的量词(从10开始取,如果可以就取10,不可以就往外吐一个接着试探知道成功 输出 0-10
Pattern pattern = Pattern.compile("(.{3,10})[0-9]");
//Reluctant quantifiers 不情愿的量词(从3开始取,如克可以就去3,不可以就吞一个直到可以) 输出 0-5
Pattern pattern = Pattern.compile("(.{3,10}?)[0-9]");
//Possessive quantifiers 占有量词(取10,不往外吐) 输出 not match!
Pattern pattern = Pattern.compile("(.{3,10}+)[0-9]");
String string = "aaaa5bbbb6";
Matcher matcher = pattern.matcher(string);
if (matcher.find()) {
p(matcher.start() + "-" + matcher.end());
} else {
p("not match!");
}
Special constructs 特殊构造 (named-capturing and non-capturing命名捕获和非捕获)
(?=a).{3}的意思是从前往后找找开头是a的3个字符,
.{3}(?=a)的意思是从前往后找找结尾后面是a的3个字符
如果你想要包含后面的字符.{3}(?<=a),意思是从后往前找结尾是a的3个字符
//non-capturing groups
//Pattern pattern = Pattern.compile(".{3}(?=a)"); 输出:444
Pattern pattern = Pattern.compile("(?=a).{3}"); //输出:a66
String string = "444a66b";
Matcher matcher = pattern.matcher(string);
while(matcher.find()) {
p(matcher.group());
}
Back references
(\d(\d))\1后面的\1是重复前面第一个(中的值,第一个\d是1,第二个(\d)是2,所有\1为12
(\d(\d))\2后面的\2是重复前面第二个(中的值,第一个\d是1,第二个(\d)是2,所有\2为2
//back refenrences
Pattern pattern = Pattern.compile("(\\d(\\d))\\1");
String string = "1212";
Matcher matcher = pattern.matcher(string);
p(matcher.matches());
Pattern pattern = Pattern.compile("(\\d(\\d))\\2");
String string = "122";
Matcher matcher = pattern.matcher(string);
p(matcher.matches());
flags的简写
//flags的简写
Pattern pattern = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
p("Java".matches("(?i)(java)"));
抓取网页email邮箱
/*
* @(#)EmailSpider.java 2019年8月12日上午9:42:45
* Java
* Copyright 2019 Thuisoft, Inc. All rights reserved.
* THUNISOFT PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
*/
package regException;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* EmailSpider
* @author Administrator
* @version 1.0
*
*/
public class EmailSpider {
/**
* @param args
* 抓取网页中的email地址
*/
public static void main(String[] args) {
try {
BufferedReader bReader = new BufferedReader(new FileReader("E:\\homework\\html\\email.html"));
String line = "";
while ((line = bReader.readLine()) != null) {
parse(line);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* @param line
*/
private static void parse(String line) {
Pattern pattern = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");
Matcher matcher = pattern.matcher(line);
while(matcher.find()) {
System.out.println(matcher.group());
}
}
}
统计代码行数
/*
* @(#)CodeCounter.java 2019年8月12日上午9:55:40
* Java
* Copyright 2019 Thuisoft, Inc. All rights reserved.
* THUNISOFT PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
*/
package regException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
/**
* CodeCounter
* @author Administrator
* @version 1.0
*
*/
public class CodeCounter {
static long normalLines = 0;
static long commentLines = 0;
static long whiteLines = 0;
/**
* @param args
*/
public static void main(String[] args) {
File file = new File("E:\\homework\\zhangyi\\src\\com\\thunisoft\\zhangyi\\logic");
File[] codeFiles = file.listFiles();
for(File child : codeFiles) {
if (child.getName().matches(".*\\.java$")) {
parse(child);
}
}
System.out.println("noramlLines : "+ normalLines);
System.out.println("commentLines : "+ commentLines);
System.out.println("whiteLines : "+ whiteLines);
}
/**
* @param file
*/
private static void parse(File file) {
BufferedReader bReader = null;
boolean comment = false;
try {
bReader = new BufferedReader(new FileReader(file));
String line = "";
while((line = bReader.readLine()) != null) {
line = line.trim();
//注意:readLine把每一行最后的\n去掉了
if (line.matches("^[\\s&&[^\\n]]*$")) {
whiteLines++;
} else if (line.startsWith("/*") || line.startsWith("//")) {
commentLines ++;
if (line.startsWith("/*") && !line.endsWith("*/")) {
comment = true;
}
} else if (true == comment) {
commentLines ++;
if (line.endsWith("*/")) {
comment = false;
}
} else {
normalLines ++;
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (bReader != null) {
try {
bReader.close();
bReader = null;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}