linux上
1.echo http://www.feedsky.com |awk '/^http:\/\/www.*\.com$/'
2.$ echo '
http://asdfl.la.cn/
http://www.abc.com
http://wer12.23.33.net
http://asdf.com
'|awk '/^http:\/\/([^.\/]+\.)*([^.\/]+)\.([^.\/]+)\/?$/'
java里 正则表达式
1. 转意是\,但是\本身也需要转义,所以是 \\(2 个反斜杠)
2.开始和结束不是/^ $/,而是^ $,没有/
3. []里的.不需要转义
如 :
- "^(http:\\/\\/([^.\\/]+\\.)*[^.\\/]+\\/?)$"
一个简单demo:
java 代码
- package identify.blog.work;
-
- import org.apache.log4j.Logger;
- import org.apache.regexp.RE;
-
- public class RegTest {
- private static Logger logger = Logger.getLogger(RegTest. class );
-
- private static RE pattersBlog[] = null ;
-
- static {
- final String[] possibleBlogFormats = new String[]{
- "^(http:\\/\\/[^.\\/]+\\.blog\\.hexun\\.com)\\/?$"
-
- };
-
- pattersBlog = new RE[possibleBlogFormats.length];
- for ( int i= 0 ; i < pattersBlog.length; i++){
- pattersBlog[i] = new RE(possibleBlogFormats[i],RE.MATCH_CASEINDEPENDENT);
- }
-
- }
- public RegTest(){
-
- }
- public static void main(String[] args){
- RegTest regTest = new RegTest();
- String url1 = "http://nrbfb.blog.hexun.com/test.html/adsfds" ;
- String url2 = "http://nrbfb.blog.hexun.com/" ;
-
- System.out.println(regTest.isBlogByReg(url1));
- System.out.println(regTest.isBlogByReg(url2));
-
- }
-
-
- public boolean isBlogByReg(String url){
- boolean isBlog = false ;
-
- for ( int i= 0 ; i < pattersBlog.length; i++) {
- if ( pattersBlog[i].match(url)){
- isBlog = true ;
- break ;
- }
- }
-
- return isBlog;
- }
-
-
-
- }
-
- 输出结果为:
- false
- true
":\\d*0\\d*$"