import re
pattern = '([A-Z]*)([0-9]+)'
re.compile(pattern)
print(re.match(pattern, 'AAAKJSI899')) # .group()
print(re.match(pattern, 'AAAKJSI899').group(2))
print(re.search(pattern, 'AAAKJSI899')) # .group()
print(re.findall(pattern, 'AAAKJSI899'))
print(re.finditer(pattern, 'AAAKJSI899'))
print(re.split(pattern, '___AAAKJSI899'))
print(re.sub(pattern, 'XXX9', '___AAAKJSI899'))
# search与match的区别
m = re.match('foo', 'afoodkof')
if m is not None: # None
print(m.group())
m = re.search('foo', 'afoodkof')
if m is not None: # Not None
print(m.group())
print(re.search('foo|abc', 'jifoojiabcji').group()) # 只匹配了第一个
p = '.end' # 除了\n的任意字符
print(re.match(p, '\nend')) # None
print(re.match(p, 'bend').group())
print(re.match('[ab][12][cd]', 'a2ddd').group())
mail = '\w+@(\w+\.)?\w+\.com'
print(re.match(mail, 'panjicai@xxx.gmail.com').group())
p = '(\w\w\w)-(\d\d\d)'
m = re.match(p, 'abc-123')
print(m.group()) # 完整匹配
print(m.group(1)) # 子组1
print(m.group(2)) # 子组2
print(m.groups()) # 全部子组
p = '^the'
m = re.search(p, 'the boy') # ^用于search
print(m.group())
p = r'\bthe' # !!!原始字符串
m = re.search(p, 'bite the dog') # 单词边界
print(m.group())
p = r'\Bthe' # !!!原始字符串
m = re.search(p, 'bitethe dog') # 非单词边界
print(m.group())
p = 'car'
m = re.findall(p, 'car my car is blue car') # 返回一个列表
print(m)
p = '(th\w+) and (th\w+)'
m = re.findall(p, 'This and That', re.I) # 子组作为列表中的一个元素
print(m) # [('this', 'that')]
m = re.finditer(p, 'This and that', re.I)
for g in m:
print(g.group())
p = r'(th\w+)'
m = re.finditer(p, 'This and that', re.I)
print(next(m).group())
print(next(m).group())
p = 'X'
s = 'X:jicai:X-jisd'
m = re.sub(p, 'pan', s)
print(m)
m = re.subn(p, 'pan', s)
print(m)
# 将MM/DD/YY{,YY} 转换为 DD/MM/YY{,YY}
p = r'(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})' # \N N为子组编号
s = '1/20/91'
m = re.sub(p, r'\2/\1/\3', s)
print(m)
s = '2/1/2020'
m = re.sub(p, r'\2/\1/\3', s)
print(m)
print(re.split(':', 'str1:str2:str3'))
data = ('Monutain View, CA 94040',
'Sunnyvale, CA',
'Los Altos, 94023',
'Cupertino 95014',
'Palo Alto CA')
for dataum in data:
print(re.split(', |(?= (?:\d{5}|[A-Z]{2})) ', dataum))
m = re.findall(r'(?i)yes', 'Yes:yeS:YES') # (?i) 相当于re.I
print(m)
s = """
This line is the fisrt,
another line,
that line, it's the best
"""
m = re.findall(r'(?im)^th[\w ]+', s) # (?im) 实现跨行搜索 re.I re.M
print(m)
# (?s) re.S 可以是 . 包含'\n'
# (?x) re.X 可以在正则表达式中写注释#等 #需用\#转义 使代码宜读
# (?P<name>) (?P=name)可重用 \g<name>
p = r'(?P<one>\d{3})-(?P<two>\w{3})'
m = re.search(p, '123-abc')
print(m.groupdict()) # {'one': '123', 'two': 'abc'}
p = r'(?P<one>\d{3})-(?P<two>\w{3}) (?P=one)---(?P=two)'
m = re.search(p, '123-abc 123---abc')
print(m.group())
m = re.sub(p, r'\g<two>-\g<one>', '123-abc')
print(m)
# (?:) 不保存多余的匹配
p = r'(?:\w+\.)*(\w+\.com)'
m = re.findall(p, 'google.com www.pan.com www.baidu.jicai.com')
print(m)
# (?=) 正向断言 (?!) 负向断言 不实际使用
print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'xy')))
print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'yx')))
print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'xx')))
# 贪婪
p = '.+(\d+-\d+-\d+)'
m = re.match(p, 'www123456379-10-23')
print(m.group(1)) # 9-10-23
p = '.+?(\d+-\d+-\d+)' # ? 表示尽可能少的匹配
m = re.match(p, 'www123456379-10-23')
print(m.group(1)) # 123456379-10-23
Python核心编程 - 正则表达式
最新推荐文章于 2025-08-10 20:39:24 发布
本文深入探讨了正则表达式的各种应用场景,包括匹配、查找、替换、分组和断言等高级技巧,通过实例演示如何高效处理字符串,是程序员不可多得的正则表达式实践宝典。
4万+

被折叠的 条评论
为什么被折叠?



