python学习--正则表达式

最新推荐文章于 2024-09-13 16:31:55 发布

decan5958

最新推荐文章于 2024-09-13 16:31:55 发布

阅读量575

点赞数

CC 4.0 BY-SA版权

分类专栏： python学习

本文链接：https://blog.youkuaiyun.com/decan5958/article/details/76640549

python学习专栏收录该内容

7 篇文章

订阅专栏

本文通过多个实例展示了Python中正则表达式的使用方法，包括基本的匹配模式、特殊符号的使用、贪婪与非贪婪匹配、分组及替换等高级功能。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

看到一个华南理工大学的大牛写的python，非常漂亮，然后膜拜了一下。正则表达太强大了，以后还得继续好好学https://docs.python.org/2/library/re.html#re.compile。

import re
# ^表示首行
rule = r"^hello"              
print re.findall(rule,"hello cow cat hello")

# '(pattern)' 用来指定想要找的字符
import re
rule = r'123'
rule1 = r'hdb'          
print re.findall(rule,"12345123321")      #['123', '123']
print re.findall(rule1, "aahdbaahdb")     #['hdb', 'hdb']

# [(pattern)] 用来指定字符集，[123] 表示括号内任一字符都是可选的
import re
rule = r"1[123]3"
rule1 = r"h[hdb]b"         
print re.findall(rule,"123 113 133 321")      #['123', '113', '133']
print re.findall(rule1, "aah hdb hhb hbb")    #['hdb', 'hhb', 'hbb']

#[^(pattern) ]用来表示出括号内的字符不可取，其他都是可取的
import re
rule = r"1[^12]3"
rule1 = r"h[^db]b"         
print re.findall(rule,"123 113 133 321")      #['133']
print re.findall(rule1, "aah hdb hhb hbb")    #['hhb']

# ^(pattern) 匹配字符串的行首
import re
rule = r"^123"
rule1 = r"^hdb"         
re.findall(rule,"123 113 133 321")    #['123'] 
re.findall(rule1, "aah hdb hhb hbb")  #[]

# (pattern)$ 匹配字符的行未
import re
rule = r"123$"
rule1 = r"hdb$"         
print re.findall(rule,"123 113 133 321")    #[] 
print re.findall(rule1, "aah hdb hhb hdb")  #['hdb']

# (pattern)* 表示*前面字符允许重复0或多次
import re
rule = r"12*"
rule1 = r"hd*"         
print re.findall(rule,"1 12 1212 123")    #['1', '12', '12', '12', '12'] 
print re.findall(rule1, " h hd hdhd")     #['h', 'hd', 'hd', 'hd']

# (pattern)+ 表示+前面字符允许重复1或多次
import re
rule = r"12+"
rule1 = r"hd+"         
print re.findall(rule,"1 12 1212 123")    #['12', '12', '12'，'12'] 
print re.findall(rule1, " h hd hdhd")     #['hd', 'hd', 'hd']

#匹配 - 范围内的任意字符
import re
rule = r"h[a-z]b"
rule1 = r"h[1-9]b"
print re.findall(rule,"hdb hab hzb")     #['hdb', 'hab', 'hzb']
print re.findall(rule1,"h1b h5b h9b")    #['h1b', 'h5b', 'h9b']

#*?非贪婪匹配
import re
rule = r"ab*?"  
print re.findall(rule, "abbbbbbbab ddab abc")   # return ['ab', 'ab', 'ab', 'ab'] 

# {} 表示可匹配的范围
import re
rule = r"a{3,4}"  
rule1 = r"a{1,2}"
print re.findall(rule, "aaabbbbbbaabaaaa")     # return ['aaa', 'aaaa'] 
print re.findall(rule1,"aaabbbbbbbbbbbaa")     # return ['aa', 'a', 'aa']


#re.I匹配不区分大小写
import re
rule = re.compile(r"hdb",re.I)
print rule.findall("hdb")           #['hdb']
print rule.findall("HDB")           #['HDB']
print rule.findall("HdB")	    #['HdB']


# \w 匹配包括下划线的任何单词字符，等价于“[A-Za-z0-9_]”;match()在字符串的开始处匹配模式;
import re
m = re.match(r"(\w+) (\w+)", "hdb huang, de")  
   
#group()获取给定模式的匹配项 
print m.group(0)                                  #return hdb huang
print m.group(1)                                  #return hdb
print m.group(2)                                  #return huang

#start()返回给定组的匹配项开始位置
print m.start(1)                                  #return 0
print m.start(2)                            	  #return 4

#end() 返回给定组的匹配项结束位置
print m.end(1)					   #return 3
print m.end(2)					   #return 9

#span() 返回给定组的开始和结束位置
print m.span(1)   				   #return (0, 3)
print m.span(2)					   #return (4, 9)

#split() 根据模式的匹配项分割字符串
import re
some_text = 'alpha, beta,,,,gamma delta'
print re.split('[, ]+', some_text)                  #'alpha', 'beta', 'gamma', 'delta'
print re.split('[, ]+', some_text, maxsplit=1)      #'alpha', 'beta,,,,gamma delta'  匹配一次
print re.split('[, ]+', some_text, maxsplit=2)      #'alpha', 'beta', 'gamma delta'  匹配两次


#sub()将字符串中所有rule的匹配全部用python替换 subn()并显示替换个数
import re
rule = r"h..b"
print re.sub(rule,'python','hhdb wo hhbb you')
print re.subn(rule,'python','hhdb wo hhbb you')

#a|b 匹配a或者b
import re
rule = r"\d{4}#\w+(\.dog|\.cow|\.cat)"
re.match(rule, "1234#my.dog")            
re.match(rule, "1234#my.cow")            
re.match(rule, "1234#my.cat")	


#其余常用正则表达
\w  		匹配字母数字以及下划线
\W  		匹配非字母数字以及下划线
\s  		匹配任意空白字符
\S  		匹配任意非空白字符
\d  		匹配任意数字
\D  		匹配任意非数字
\A 	 	匹配任意字符串开始
\z  		匹配字符串结束
\Z  		匹配字符串结束，，如果是存在换行，只匹配到换行前的结束字符串
\b  		匹配一个单词边界
\B  		匹配一个非单词边界
[0-9] 		匹配任意数字
[^0-9] 		匹配任意非数字
[a-z]   	匹配任意小写字母
[A-Z]   	匹配任意大写字母
[a-zA-Z0-9]     匹配任意大小写字母以及数字
re.M 		多行匹配
re.S  		让 . 匹配包括换行在内的所有字符