3.1.1 正则替换
# 格式:
re.sub("匹配原则","替换内容",string)
匹配的所有元素都替换
# 实例import re
str1 ="hello ,i'm learning python python"print(re.sub("p.{2}","xixi",str1))# hello ,i'm learning xixihon xixihon3.1.2 正则编译
import re
msg ="hello my name is Jack Cali"
msg2 ="hello my name is Rose"# 直接查找
result = re.findall("is\s(.*)$", msg)
result2 = re.findall("is\s(.*)$", msg2)print(result, result2)# 先编译再查找
reg = re.compile("is\s(.*)$")print(reg.findall(msg))print(reg.findall(msg2))# 区别pass
3.2 正则表达式
区间
3.2.1 区间
# 区间: [] 根据ascii码确定区间范围 --要求ascii码小的在前,ascii码大的在后面如:[a-z]a->97,z->122# 可以[A-z] A->65 z->122# [] 区间表示 任选括号里一个字符进行匹配# 实例import re
ret = re.findall("[12pP]x","2px 1xx Python pxoo")# 匹配中括号内任意一个字符开头,且以x结尾的两个长度的字符串print(ret)
ret = re.findall("[0-9]","124354raskgreg-grme23")# 匹配数字0-9任意字符print(ret)
ret = re.findall("[A-Z]","124354raskgreg-grme23")# 匹配大写字母A-Zprint(ret)
ret = re.findall("[a-z]","124354raskgreg-grme23")# 匹配小写字母a-zprint(ret)
ret = re.findall("[A-Za-z0-9]","124354raskgreg-grme23")# 匹配大小写字母和数字0-9print(ret)
ret = re.findall("[a-z0-9A-Z-]","124354raskgreg-grme23")# 匹配大小写字母和数字0-9及-print(ret)3.2.2 区间取反
ret = re.findall(r"[^A-z][0-9]","few2345rg43h6n5jdth")# 匹配不以A-z开头且匹配第二位是0-9数字的字符串print(ret)
符号
3.2.3 匹配或
import re
msg ="四是四,十是十 十四是十四 四十是四十"
ret = re.search(r"四|十", msg)print(ret.group())
ret2 = re.findall(r"四|十", msg)print(ret2)3.2.4.占位符
# 表除(\n以外)的任意字符import re
ret = re.findall("p.thon","pXthon python p-thon pthon p\nthon p thon")print(ret)3.2.5^ $
import re
ret = re.findall(r"^python","python1 is python")print(ret)
ret = re.findall(r"python$","python1 is python")print(ret)
ret = re.findall(r"^python$","python1 is python")print(ret)3.2.6 正则重复
# 通配符: * ? +# ? 匹配前一项0次或1次# + 匹配前一项1次以上 1-n次# * 匹配前一项任意次 0-n次# {n,m} 匹配前一项n-m次 {n,} {,m} {n}# 实例import re
ret = re.findall('py?',"pythonppyy")print(ret)# 结果:['py', 'p', 'py']
ret = re.findall('py+',"pythonppyy")print(ret)# 结果:['py', 'pyy']
ret = re.findall('py*',"pythonppyy")print(ret)# 结果:['py', 'p', 'pyy']
ret = re.findall('py{1,3}',"pythonppyy")print(ret)# 结果:['py', 'pyy']
ret = re.findall('py{1,}',"pythonppyyyy")# 匹配前一项1次到多次print(ret)# 结果:['py', 'pyyyy']
ret = re.findall('py+',"pythonppyyyy")# 匹配前一项1次到多次print(ret)# 结果:['py', 'pyyyy']
ret = re.findall('py{,3}',"pythonppyyyy")# 匹配前一项3次以下print(ret)# 结果:['py', 'p', 'pyyy']
ret = re.findall('py{3}',"pythonppyyyy")# 匹配前一项3次print(ret)# 结果:['pyyy']
快捷标识
3.2.7 快捷标识
\A 匹配字符串开始
\b 匹配词边界
\B 匹配非词边界
\w 匹配单词字符 a A 123 中文
\W 匹配非单词字符
\d 匹配数字
\D 匹配非数字
\s 匹配空白
\S 匹配非空白
# 实例# 匹配字符串的开始import re
ret = re.findall(r"\Ahello","hello world hello")print(ret)# 词边界
ret = re.findall(r"\bword\B","word123 word# abcword #word你好")print(ret)# 匹配单词字符
ret1 = re.findall(r"\wword\W","word123 word# abcword #word你好")print(ret1)
ret2 = re.findall(r"\wword\W","中国word*56w A1word# abcword #word你好")print(ret2)# 匹配数字和非数字
ret = re.findall(r"\dword\D","word123 1word# abcword #word你好")print(ret)# 匹配空白和非空白
ret = re.findall(r"\sword\S","word123 word# abcword #word你好")print(ret)
^ $
import re
ret = re.findall(r"^python","python1 is python")# 匹配字符串以python开头的python字符串print(ret)# 结果:['python']
ret = re.findall(r"python$","python1 is python")# 匹配字符串以python结尾的python字符串print(ret)# 结果:['python']
ret = re.findall(r"^python$","python1 is python")# 匹配以python字符串开头和结尾的python字符串print(ret)# 结果:[]
正则重复
# 通配符: * ? +# ? 匹配前一项0次或1次# + 匹配前一项1次以上 1-n次# * 匹配前一项任意次 0-n次# {n,m} 匹配前一项n-m次 {n,} {,m} {n}import re
ret = re.findall('py?',"pythonppyy")print(ret)# 结果:['py', 'p', 'py']
ret = re.findall('py+',"pythonppyy")print(ret)# 结果:['py', 'pyy']
ret = re.findall('py*',"pythonppyy")print(ret)# 结果:['py', 'p', 'pyy']
ret = re.findall('py{1,3}',"pythonppyy")print(ret)# 结果:['py', 'pyy']
ret = re.findall('py{1,}',"pythonppyyyy")# 匹配前一项1次到多次print(ret)# 结果:['py', 'pyyyy']
ret = re.findall('py+',"pythonppyyyy")# 匹配前一项1次到多次print(ret)# 结果:['py', 'pyyyy']
ret = re.findall('py{,3}',"pythonppyyyy")# 匹配前一项3次以下print(ret)# 结果:['py', 'p', 'pyyy']
ret = re.findall('py{3}',"pythonppyyyy")# 匹配前一项3次print(ret)# 结果:['pyyy']
贪婪模式和非贪婪模式
# 贪婪模式 尽可能地匹配长的字符串# 非贪婪模式 从左至右依次匹配 匹配到就结束本字段匹配 然后进行后面的字符串匹配 (+? *? {2,4}?)# 默认为贪婪模式import re
ret = re.findall("py*","pythonpyyyyy")print(ret)# 结果:['py', 'pyyyyy']# 非贪婪模式import re
msg ="cats and dogs,cats1 and dogs1"print(re.findall(r"c.*?s",msg))# 结果:['cats', 'cats']import re
ret = re.findall("py*?","pythonpyyyyy")print(ret)# 结果:['p', 'p']
正则分组
# group 默认参数是0 表示输出整个匹配字符串# 参数 n (n>0)输出第几个组的匹配字符串
msg ="tel:173-7572-2991"
ret = re.search(r"(\d{3})-(\d{4})-(\d{4})",msg)print(ret.group())# 输出匹配到的字符串print(ret.group(0))# 输出匹配到的字符串print(ret.group(1))# 输出匹配到的第一组的子字符串print(ret.group(2))# 输出匹配到的第二组的子字符串print(ret.group(3))# 输出匹配到的第三组的子字符串
分组向后引用
# 前面的匹配内容会保存在内存里面,后面匹配从内存里查找
msg ="tel:173-7572-2991"
msg ="tel:173-7572-173"
ret = re.search(r"(\d{3})-(\d{4})-\1", msg)# \1表示匹配第一组子字符串print(ret.group())# 只有match对象才有group属性 AttributeError: 'NoneType' object has no attribute 'group'# 结果:173-7572-173
捕获分组和非捕获分组
# 捕获分组# 分组之后匹配到的数据会发放在内存里,并且给定一个从1开始的索引# 捕获分组是可以进行分组向后引用# 非捕获分组(?:正则表达式)# 只分组不捕获 匹配到的分组数据不放在内存里# 不能使用分组向后引用import re
msg ="tel:173-7572-7572"
ret = re.search(r'(?:\d{3})-(\d{4})-\1', msg)print(ret.group(0))# 打印匹配字符串 结果:173-7572-7572print(ret.group(1))# 打印第一个捕获分组 结果:7572# 如果有捕获分组 findall只会匹配捕获分组里的内容import re
msg ="tel:173-7572-7572"
ret = re.findall(r'(?:\d{3})-(\d{4})-\1', msg)print(ret)#结果:['7572']
命名分组
# 命名分组 (?P<名字>正则表达式)import re
msg ="tel:173-7572-7572"
ret = re.search(r'(?P<first>\d{3})-(\d{4})-\2', msg)print(ret.group())print(ret.group("first"))print(ret.groupdict())# 结果:173-7572-7572173{'first':'173'}
正则标记
# 正则标记 --改变正则表达式的默认匹配规则 如大小写是否敏感
msg ="""YOU JUMP, I GO
you jump, i go"""# 对大小写不敏感的标记
ret = re.findall(r"JUMP", msg, re.I)# 忽略大小写print(ret)# 结果:['JUMP', 'jump']# 多行模式
ret = re.findall(r"^YOU JUMP, I GO$", msg, re.I | re.M)# 忽略大小写 且运用多行模式print(ret)# 结果:['YOU JUMP, I GO', 'you jump, i go'] # re.S 让.匹配任意字符 包括换行符
ret = re.findall(r"^you.*go$", msg, re.I | re.S)# 忽略大小写 且让.匹配任意字符包括换行符print(ret)# 结果:['YOU JUMP, I GO \nyou jump, i go']