首先是Python实际全测试例子然后是正则表达式使用规则全理解
首先是Python实际全测试
不同编程语言规则基本相同
正则表达式具有匹配不重叠的特点,已经匹配过的不在匹配
下面的代码可直接复制运行
import re
from re import match
#匹配所有正则表达式方法:(文字,正则表达式)
def to_match(words,pattern,No):
matches = re.findall(pattern,words)
if matches:
for match in matches:
print("No."+str(No)+": ",match)
else:
print("No." + str(No) +": No Match")
"""
(文字,正则表达式,序号)
第一个参数是原字符串,第二个是正则表达式,第三个是序号
"""
"""
成功人chen某
24.9
"""
"""
建议将看过的代码注释
便于查看
"""
# 1,单个匹配
print("\n\n1,单个匹配")
# 匹配一个字符:.
to_match("nuosu",".....",1)
to_match("abcdefg","..",2)
# No.1: nuosu
# No.2: ab
# No.2: cd
# No.2: ef
# 匹配一个自己:
to_match("nuosu","u",1)
to_match("abcdefg","cde",2)
# No.1: u
# No.1: u
# No.2: cde
# 2,位置匹配
print("\n\n2,位置匹配")
# 匹配开头:^
to_match("nuosu","nuo",1)
to_match("nuosu","abc",2)
# No.1: nuo
# No.2: No Match
# 匹配结尾:$
to_match("nuosu","su",1)
to_match("nuosu","abc",2)
# No.1: su
# No.2: No Match
# 3,个数匹配
print("\n\n3,个数匹配")
# 零个或一个:?
to_match("nuosu","nuo?",1)
to_match("nuosu","nuoo?",2)
to_match("nuosu","nuoos?",3)
# No.1: nuo
# No.2: nuo
# No.3: No Match
print("\n\n3,个数匹配")
# 大于等于零个:*
to_match("nuosu","nuo*",1)
to_match("nuosu","nuoo*",2)
to_match("nuosu","nuoos*",3)
# No.1: nuo
# No.2: nuo
# No.3: No Match
# 大于等于一个:+
to_match("nuosu","nuo+",1)
to_match("nuosu","nuoo+",2)
# No.1: nuo
# No.2: No Match
# 4,转义匹配
print("4,转义匹配")
to_match("nuo..su","nuo\\.",1)
to_match("nuo..su","\\.",2)
# No.1: nuo.
# No.2: .
# No.2: .
# 5,括号操作
print("\n\n5,括号操作")
# { } 个数匹配 限制出现次数 a{2} 限制a出现两次
print("No.A")
to_match("nuosu",r"u",1)
to_match("nuuuuosuu",r"u{2}",2)
# No.1: u
# No.1: u
# No.2: uu
# No.2: uu
# No.2: uu
# { ,}:a{m,n} 限制a出现范围为n,m之间
print("No.B")
to_match("nuosu",r"u{2,3}",1)
to_match("nuuuuosuu",r"u{2,3}",2)
# No.1: No Match
# No.2: uuu
# No.2: uu
# [ ] 匹配的字符在[]中
print("No.C")
to_match("nuosu",r"[A-Z]uo",1)
to_match("Nuosu",r"[A-Z]uo",2)
# No.1: No Match
# No.2: Nuo
to_match("nuosu",r"[Nu]",3)
to_match("Nuosu",r"[Nu]",4)
# No.3: u
# No.3: u
# No.4: N
# No.4: u
# No.4: u
# [^ ] 排除在[]中出现的字符
print("No.D")
to_match("nuosu","[A-Z]uo",1)
to_match("Nuosu","[A-Z]uo",2)
# No.1: No Match
# No.2: Nuo
# ( ) 捕获 把()内做作为一组提出来
print("No.E")
to_match("nuosu","(nuo)",1)
to_match("nuosu","(nuo)su",2)
# No.1: nuo
# No.2: nuo
# ( | ) 捕获 把以|分隔的作为一组提取出来
print("No.F")
to_match("nuosu","(nuo|uos)",1)
to_match("nuosu","(nu|os|u)",2)
# No.1: nuo
# No.2: nu
# No.2: os
# No.2: u
# (?: ) (?:abc) 捕获 把在相应的位置有abc的地方提取出来
print("No.G")
to_match("nuosu","(?:uo)",1)
to_match("nuosu","(?:uo)su",2)
to_match("nuosu","n(?:uo)",3)
to_match("nuosu","n(?:uo)su",4)
# No.1: uo
# No.2: uosu
# No.3: nuo
# No.4: nuosu
# (?= ) xx(?=abc) 非捕获 把后面紧跟abc的xx提取出来不包括abc
print("No.H")
to_match("nuosu","(?=os)",1)#相当于""空
to_match("nuosu","nu(?=os)",2)
# No.1:
# No.2: nu
# (?! ) xx(?!abc) 非捕获 把后面不紧跟abc的xx提取出来不包括abc
print("No.I")
to_match("nuosu","(?!abc)",1)#相当于""空
to_match("nuosu",".(?!abc)",2)
to_match("nuosu","nu(?!abc)",3)
to_match("nuosu","nu(?!osu)",4)
# No.1:
# No.1:
# No.1:
# No.1:
# No.1:
# No.1:
# No.2: n
# No.2: u
# No.2: o
# No.2: s
# No.2: u
# No.3: nu
# No.4: No Match
# (?<= ) (?<=abc) 非捕获 把前面紧跟abc的xx提取出来不包括abc
print("No.J")
to_match("nuosu","(?<=nu)",1)#相当于""空
to_match("nuosu","(?<=nu).",2)
to_match("nuosu","(?<=nu)osu",3)
# No.1:
# No.2: o
# No.3: osu
# (?<! )(?<!abc)xx 非捕获 把前面不紧跟abc的xx提取出来不包括abc
print("No.K")
to_match("nuosu","(?<!nu)",1)#相当于""空
to_match("nuosu","(?<!nu).",2)
to_match("nuosu","(?<!nu)osu",3)
# No.1:
# No.1:
# No.1:
# No.1:
# No.1:
# No.2: n
# No.2: u
# No.2: s
# No.2: u
# No.3: No Match
# (?=() | ) 条件匹配 (?=(abc)de|fg) 如果成功匹配abc,则匹配de,否则匹配fg
print("No.L")
to_match("nuosu","(?=(nuo)su|abc)",1)
to_match("nuosu","(?=(o)su|u)",2)
# No.1: nuo
# No.2:
# No.2: o
# No.2:
# 6,其他
print("\n\n6,其他")
# \d\w
to_match("nuosu","\\d",1)
to_match("nuosu","\\w\\w\\w",2)
# No.1: No Match
# No.2: nuo
# | 选择
to_match("nuosu","su|us",1)
to_match("nuous","su|us",2)
# No.1: su
# No.2: us
# 7,嵌套组合
print("\n\n7,嵌套组合")
to_match("nuonuouououonaoaonsu","(n)((uo)+|(ao)+)",1)
# No.1: ('n', 'uo', 'uo', '')
# No.1: ('n', 'uouououo', 'uo', '')
# No.1: ('n', 'aoao', '', 'ao')
# 8,命名捕获组
import re
print("\n\n8,命名捕获组")
# 参数:(正则表达式,字符串)
match = re.search(r"(osu)--(AY)","nuosu--AYI")
if match:
No1,No2 = match.groups()
print(f"No1 is {No1},No2 is {No2}")
# No1 is osu,No2 is AY
else:
print("No Match")
# 9,贪心
single_to_match("nuoooosuoosu","(.+?)su",1)
single_to_match("nuoooosuoosu","(.+)su",2)#贪心
# No.1: nuoooo
# No.2: nuoooosuoo
#匹配第一个正则表达式方法:(文字,正则表达式)
def single_to_match(words,pattern,No):
match = re.search(pattern,words)
if match:
print("No." + str(No) + ": ", match.group(1))
else:
print("No." + str(No) +": No Match")
正则表达式使用规则全理解
1,单个匹配
匹配一个字符:. . 匹配一个字符
匹配一个自己:a a 匹配一个自己a aa匹配两个自己aa
2,位置匹配
在开头匹配:^ ^abc 取以abc开头
在结尾匹配:$ &abc 取以abc结尾
3,个数匹配
零个或一个:? a? 零个或一个a
大于等于零个:* a* 大于等于零个a
大于等于一个:+ a+ 大于等于一个a
4,转义匹配
匹配转义字符:\ \. 匹配一个.(点)
5,括号操作
{ }:a{2} 限制a出现两次
{ , }:a{m,n} 限制a出现范围为n,m之间
[ ]:[abc] [a-z0-9] 匹配的字符在[]中
[^ ]:[abc] [a-z0-9] 排除在[]中出现的字符
( ):(abc) 捕获 把abc做作为一组提出来
( | ):(abc|cde) 捕获 把abc或cde作为一组提取出来
(?: ):(?:abc) 捕获 把在相应的位置有abc的地方提取出
(?= ):(?=abc) 非捕获 把后面紧跟abc的xx提取出来不包括abc
(?! ):(?!abc)xx 非捕获 把后面不紧跟abc的xx提取出来不包括abc
(?<= ):(?<=abc)xx 非捕获 把前面紧跟abc的xx提取出来不包括abc
(?<! ):(?<!abc)xx 非捕获 把前面不紧跟abc的xx提取出来不包括abc
(?=() | ):条件匹配 (?=(abc)de|fg) 如果成功匹配abc,则匹配de,否则匹配fg
6,其他
\d:匹配数字
\w:匹配大小写和数字
|:或 abc|cd 匹配abc或cd
7,嵌套组合
(n)((uo)+|(ao)+)
....
8,命名捕获组
见代码中
9,贪心greedy
.*? 尽可能端短地的匹配
.* 尽可能长地的匹配/贪心