import re
pattern ='([A-Z]*)([0-9]+)'
re.compile(pattern)print(re.match(pattern,'AAAKJSI899'))# .group()print(re.match(pattern,'AAAKJSI899').group(2))print(re.search(pattern,'AAAKJSI899'))# .group()print(re.findall(pattern,'AAAKJSI899'))print(re.finditer(pattern,'AAAKJSI899'))print(re.split(pattern,'___AAAKJSI899'))print(re.sub(pattern,'XXX9','___AAAKJSI899'))# search与match的区别
m = re.match('foo','afoodkof')if m isnotNone:# Noneprint(m.group())
m = re.search('foo','afoodkof')if m isnotNone:# Not Noneprint(m.group())print(re.search('foo|abc','jifoojiabcji').group())# 只匹配了第一个
p ='.end'# 除了\n的任意字符print(re.match(p,'\nend'))# Noneprint(re.match(p,'bend').group())print(re.match('[ab][12][cd]','a2ddd').group())
mail ='\w+@(\w+\.)?\w+\.com'print(re.match(mail,'panjicai@xxx.gmail.com').group())
p ='(\w\w\w)-(\d\d\d)'
m = re.match(p,'abc-123')print(m.group())# 完整匹配print(m.group(1))# 子组1print(m.group(2))# 子组2print(m.groups())# 全部子组
p ='^the'
m = re.search(p,'the boy')# ^用于searchprint(m.group())
p = r'\bthe'# !!!原始字符串
m = re.search(p,'bite the dog')# 单词边界print(m.group())
p = r'\Bthe'# !!!原始字符串
m = re.search(p,'bitethe dog')# 非单词边界print(m.group())
p ='car'
m = re.findall(p,'car my car is blue car')# 返回一个列表print(m)
p ='(th\w+) and (th\w+)'
m = re.findall(p,'This and That', re.I)# 子组作为列表中的一个元素print(m)# [('this', 'that')]
m = re.finditer(p,'This and that', re.I)for g in m:print(g.group())
p = r'(th\w+)'
m = re.finditer(p,'This and that', re.I)print(next(m).group())print(next(m).group())
p ='X'
s ='X:jicai:X-jisd'
m = re.sub(p,'pan', s)print(m)
m = re.subn(p,'pan', s)print(m)# 将MM/DD/YY{,YY} 转换为 DD/MM/YY{,YY}
p = r'(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})'# \N N为子组编号
s ='1/20/91'
m = re.sub(p, r'\2/\1/\3', s)print(m)
s ='2/1/2020'
m = re.sub(p, r'\2/\1/\3', s)print(m)print(re.split(':','str1:str2:str3'))
data =('Monutain View, CA 94040','Sunnyvale, CA','Los Altos, 94023','Cupertino 95014','Palo Alto CA')for dataum in data:print(re.split(', |(?= (?:\d{5}|[A-Z]{2})) ', dataum))
m = re.findall(r'(?i)yes','Yes:yeS:YES')# (?i) 相当于re.Iprint(m)
s ="""
This line is the fisrt,
another line,
that line, it's the best
"""
m = re.findall(r'(?im)^th[\w ]+', s)# (?im) 实现跨行搜索 re.I re.Mprint(m)# (?s) re.S 可以是 . 包含'\n'# (?x) re.X 可以在正则表达式中写注释#等 #需用\#转义 使代码宜读# (?P<name>) (?P=name)可重用 \g<name>
p = r'(?P<one>\d{3})-(?P<two>\w{3})'
m = re.search(p,'123-abc')print(m.groupdict())# {'one': '123', 'two': 'abc'}
p = r'(?P<one>\d{3})-(?P<two>\w{3}) (?P=one)---(?P=two)'
m = re.search(p,'123-abc 123---abc')print(m.group())
m = re.sub(p, r'\g<two>-\g<one>','123-abc')print(m)# (?:) 不保存多余的匹配
p = r'(?:\w+\.)*(\w+\.com)'
m = re.findall(p,'google.com www.pan.com www.baidu.jicai.com')print(m)# (?=) 正向断言 (?!) 负向断言 不实际使用print(bool(re.search(r'(?:(x)|y)(?(1)y|x)','xy')))print(bool(re.search(r'(?:(x)|y)(?(1)y|x)','yx')))print(bool(re.search(r'(?:(x)|y)(?(1)y|x)','xx')))# 贪婪
p ='.+(\d+-\d+-\d+)'
m = re.match(p,'www123456379-10-23')print(m.group(1))# 9-10-23
p ='.+?(\d+-\d+-\d+)'# ? 表示尽可能少的匹配
m = re.match(p,'www123456379-10-23')print(m.group(1))# 123456379-10-23