正则表达式
作用
初体验
public class RegexDemo {
public static void main(String[] args) {
//需求:检查qq号,6-20位,必须都是数字
System.out.println(checkQQ("23456781"));
System.out.println(checkQQ("123"));
System.out.println(checkQQ("23ww34555"));
System.out.println(checkQQ("null"));
System.out.println(checkQQ2("23456781"));
System.out.println(checkQQ2("123"));
System.out.println(checkQQ2("23ww34555"));
System.out.println(checkQQ2("null"));
}
//正则表达式进行检查
private static boolean checkQQ2(String s) {
return s!=null&&s.matches("\\d{6,20}");
}
//正常的检查方法
private static boolean checkQQ(String s) {
if(s==null||s.length()<6||s.length()>20){
return false;
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if(c<'0'||c>'9'){
return false;
}
}//先过滤掉那些false的情况,这样就不用写else大括号了
return true;
}
}
匹配规则
规则
例子
public class RegexRole {
public static void main(String[] args) {
System.out.println("a".matches("[abc]"));//true
System.out.println("z".matches("[abc]"));//false
System.out.println("a".matches("[^abc]"));//false
System.out.println("z".matches("[^abc]"));//true
System.out.println("-------------------------");
System.out.println("we".matches("[^abc]"));//false
System.out.println("a".matches("[a-zA-Z]"));//true
System.out.println("a".matches("a-zA-Z&&[^ab]"));//false
System.out.println("n".matches("a-zA-Z&&[^m-p]"));//false
System.out.println("-------------------------");
System.out.println("2".matches("\\d"));//true
System.out.println("2".matches("\\D"));//false
System.out.println("23".matches("\\d"));//false
System.out.println("-------------------------");
System.out.println("a".matches("\\w"));//true
System.out.println("23_".matches("\\w"));//false
System.out.println("我".matches("\\W"));//true
System.out.println(" ".matches("\\s"));//true
System.out.println("-------------------------");
System.out.println("1234".matches("\\d{4}"));//true
System.out.println("12_ww".matches("\\w{5,}"));//true
System.out.println("-------------------------");
System.out.println("12ww21".matches("\\w&&[^_]{5,}"));//true
System.out.println("12ww1嗨".matches("\\w[^_]{5,}"));//true
System.out.println("-------------------------");
System.out.println("12ww1".matches("[\\w[^_]]{5,}"));//true
System.out.println("12ww1".matches("\\w[^_]{5,}"));//false
System.out.println("12ww1".matches("[a-zA-Z\\d]{5,}"));//true
}
}
常见案例
手机号码
public static void checkPhone(){
Scanner sc = new Scanner(System.in);
while (true) {//快捷键 ctrl+alt+t!
System.out.println("请输入您要注册的手机号:please");
String phone = sc.next();
if(phone.matches("1[3-9]\\d{9}")){
System.out.println("手机号码格式正确,注册完成!yes phone");
break;
}else {
System.out.println("手机号码格式不正确!no phone");
}
}
}
邮箱
public static void checkEmail(){
Scanner sc = new Scanner(System.in);
while (true) {//快捷键 ctrl+alt+t!
System.out.println("请输入您要注册的邮箱:please email");
String email = sc.next();
//3468593107@qq.com.cn
//3456@163.com
//12_wwww2@1234www.com
if(email.matches("\\w+@[\\w[^_]]{2,20}[\\.[\\w[^_]]{2,20}]{1,2}")){
//或者写成
// \\w+@[a-zA-Z\\d]{2,20}[\\.[a-zA-Z\\d]{2,20}]{1,2}
//注意每个{}是针对哪块数字的限定,要把那一整块数字用{}或()框起来
System.out.println("邮箱格式正确,注册完成!yes email");
break;
}else {
System.out.println("邮箱格式不正确!no email");
}
}
}
电话号码
public static void checkTel(){
Scanner sc = new Scanner(System.in);
while (true) {//快捷键 ctrl+alt+t!
System.out.println("请输入您要注册的电话号码:please tel");
String tel = sc.next();
//020-2223490
//0202223490
if(tel.matches("0\\d{2}-?\\d{7}")){
//?表示前面的出现一次或者不出现,也就是最多一次
System.out.println("电话号码格式正确,注册完成!yes tel");
}else {
System.out.println("电话号码格式不正确!no tel");
}
}
}
在方法中的使用
在string的replace和split方法中,正则表达式的使用
public class RegexInMethod {
public static void main(String[] args) {
String namesInit = "小路2222wwww李四ooo张三1王二麻子000pp";
//用中间的单词字符分割 得到一个字符串数组
String[] names = namesInit.split("\\w+");
for (String name : names) {
System.out.println(name);
//欸,控制台乱码啷个办……
}
//将中间的这些单词字符替换成两个空格
String s = namesInit.replaceAll("\\w+", " ");
System.out.println(s);
}
}
爬取
这是应该有的效果:
这是我的代码和结果:
public class RegexCrawler {
public static void main(String[] args) {
String str = "来黑马程序学习java,电话020-2222345,或者" +
"联系邮箱 itcase@itcase.cn,电话18762832633," +
"0203333456 ,邮箱boza@itcast.com,400-100-3233," +
"4001003233";
//设置爬取规则
String regex = "(1[3-9]\\d{9})|" +
"(\\w{1,30}@(\\w&&[^_]){2,20}"+
"(\\.(\\w&&[^_]){2,20}){1,2})|" +
"(0\\d{2,6}-?\\d{5,20})|" +
"(400-?\\d{3,9}-?\\d{3,9})";
//把爬取规则编译成java可识别的匹配规则
Pattern pattern = Pattern.compile(regex);
//得到一个匹配器对象
Matcher matcher = pattern.matcher(str);
//开始爬了
while (matcher.find()){
System.out.println(matcher.group());
}
}
}
找到错误了!
\\w&&[^_]和\\w[^_],一个是且,一个是或!
改掉就好啦(上面代码已改动)