Python核心编程 - 正则表达式_python核心编程正则-优快云博客

本文链接：https://blog.youkuaiyun.com/MathisonPan/article/details/104137658

本文深入探讨了正则表达式的各种应用场景，包括匹配、查找、替换、分组和断言等高级技巧，通过实例演示如何高效处理字符串，是程序员不可多得的正则表达式实践宝典。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import re

pattern = '([A-Z]*)([0-9]+)'
re.compile(pattern)

print(re.match(pattern, 'AAAKJSI899'))  # .group()
print(re.match(pattern, 'AAAKJSI899').group(2))
print(re.search(pattern, 'AAAKJSI899'))  # .group()
print(re.findall(pattern, 'AAAKJSI899'))
print(re.finditer(pattern, 'AAAKJSI899'))
print(re.split(pattern, '___AAAKJSI899'))
print(re.sub(pattern, 'XXX9', '___AAAKJSI899'))

# search与match的区别
m = re.match('foo', 'afoodkof')
if m is not None:    # None
    print(m.group())
m = re.search('foo', 'afoodkof')
if m is not None:    # Not None
    print(m.group())

print(re.search('foo|abc', 'jifoojiabcji').group())  # 只匹配了第一个

p = '.end'  # 除了\n的任意字符
print(re.match(p, '\nend'))  # None
print(re.match(p, 'bend').group())
print(re.match('[ab][12][cd]', 'a2ddd').group())

mail = '\w+@(\w+\.)?\w+\.com'
print(re.match(mail, 'panjicai@xxx.gmail.com').group())

p = '(\w\w\w)-(\d\d\d)'
m = re.match(p, 'abc-123')
print(m.group())  # 完整匹配
print(m.group(1))  # 子组1
print(m.group(2))  # 子组2
print(m.groups())  # 全部子组

p = '^the'
m = re.search(p, 'the boy')  # ^用于search
print(m.group())
p = r'\bthe'  # !!!原始字符串
m = re.search(p, 'bite the dog')  # 单词边界
print(m.group())
p = r'\Bthe'  # !!!原始字符串
m = re.search(p, 'bitethe dog')  # 非单词边界
print(m.group())

p = 'car'
m = re.findall(p, 'car my car is blue car')  # 返回一个列表
print(m)

p = '(th\w+) and (th\w+)'
m = re.findall(p, 'This and That', re.I)  # 子组作为列表中的一个元素
print(m)    # [('this', 'that')]
m = re.finditer(p, 'This and that', re.I)
for g in m:
    print(g.group())
p = r'(th\w+)'
m = re.finditer(p, 'This and that', re.I)
print(next(m).group())
print(next(m).group())

p = 'X'
s = 'X:jicai:X-jisd'
m = re.sub(p, 'pan', s)
print(m)
m = re.subn(p, 'pan', s)
print(m)

# 将MM/DD/YY{,YY} 转换为 DD/MM/YY{,YY}
p = r'(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})'  # \N N为子组编号
s = '1/20/91'
m = re.sub(p, r'\2/\1/\3', s)
print(m)
s = '2/1/2020'
m = re.sub(p, r'\2/\1/\3', s)
print(m)

print(re.split(':', 'str1:str2:str3'))

data = ('Monutain View, CA 94040',
        'Sunnyvale, CA',
        'Los Altos, 94023',
        'Cupertino 95014',
        'Palo Alto CA')
for dataum in data:
    print(re.split(', |(?= (?:\d{5}|[A-Z]{2})) ', dataum))

m = re.findall(r'(?i)yes', 'Yes:yeS:YES')  # (?i) 相当于re.I
print(m)
s = """
This line is the fisrt,
another line,
that line, it's the best
"""
m = re.findall(r'(?im)^th[\w ]+', s)  # (?im) 实现跨行搜索 re.I re.M
print(m)

# (?s) re.S 可以是 . 包含'\n'
# (?x) re.X 可以在正则表达式中写注释#等 #需用\#转义 使代码宜读
# (?P<name>) (?P=name)可重用  \g<name>
p = r'(?P<one>\d{3})-(?P<two>\w{3})'
m = re.search(p, '123-abc')
print(m.groupdict())   # {'one': '123', 'two': 'abc'}
p = r'(?P<one>\d{3})-(?P<two>\w{3}) (?P=one)---(?P=two)'
m = re.search(p, '123-abc 123---abc')
print(m.group())
m = re.sub(p, r'\g<two>-\g<one>', '123-abc')
print(m)

# (?:)  不保存多余的匹配
p = r'(?:\w+\.)*(\w+\.com)'
m = re.findall(p, 'google.com www.pan.com www.baidu.jicai.com')
print(m)

# (?=) 正向断言 (?!) 负向断言  不实际使用

print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'xy')))
print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'yx')))
print(bool(re.search(r'(?:(x)|y)(?(1)y|x)', 'xx')))

# 贪婪
p = '.+(\d+-\d+-\d+)'
m = re.match(p, 'www123456379-10-23')
print(m.group(1))   # 9-10-23
p = '.+?(\d+-\d+-\d+)'    # ? 表示尽可能少的匹配
m = re.match(p, 'www123456379-10-23')
print(m.group(1))   # 123456379-10-23