正则表达式 re模块,python内置官方库
re.findall() 查找匹配的所有数据,以列表的形式返回;匹配不到数据返回空列表
re.search() 匹配第一个符合规则的数据,返回一个匹配对象;匹配不到None
re.match() 匹配第一个符合规则的数据(必须在字符串的开头),返回一个匹配对象;匹配不到None
re.sub() 匹配内容并替换
import re
# --------------------单字符(元字符)-------------------------
# re.findall() 查找匹配的所有数据,以列表的形式返回
# . 匹配任意一个字符(\n除外)
res = re.findall('.', '123456!@#$%^&*()P{_+ljhgAS}|\,./?><\n')
print(res)
# ['1', '2', '3', '4', '5', '6', '!', '@', '#', '$', '%',
# '^', '&', '*', '(', ')', 'P', '{', '_', '+', 'l', 'j',
# 'h', 'g', 'A', 'S', '}', '|', '\\', ',', '.', '/', '?', '>', '<']
# [] 列举可以匹配的字符,不能为空;
# [12a] 匹配1,2,a ['1', '2', 'a'];
# [1-3] 匹配1,2,3 ['1', '2', '3'];
# [0-9a-zA-Z/.,]
res = re.findall('[1-3]', '123asAQ,.,.')
print(res)
# \d 匹配任意一个数字;
s = 'aaQQ..1a269o\n'
res = re.findall('\d', s)
print(res)
# ['1', '2', '6', '9']
# \D 匹配任意一个非数字;
res = re.findall('\D', s)
print(res)
# ['a', 'a', 'Q', 'Q', '.', '.', 'a', 'o', '\n']
# \s 匹配任意一个空白字符,包含空格,tab键,\n换行符;
res = re.findall('\s', '123 as AQ,.,.\n')
print(res)
# [' ', ' ', ' ', '\n']
# \S 匹配任意一个非空白字符,包含空格,tab键,\n换行符;
res = re.findall('\S', '123 as AQ,.,.\n')
print(res)
# ['1', '2', '3', 'a', 's', 'A', 'Q', ',', '.', ',', '.']
# \w 匹配任意一个单词字符,包含数字字母下划线;
res = re.findall('\w', '123 a_s AQ,.,.\n')
print(res)
# ['1', '2', '3', 'a', '_', 's', 'A', 'Q']
# \W 匹配任意一个非单词字符;
res = re.findall('\W', '123 a_s AQ,.,.\n')
print(res)
# [' ', ' ', ' ', ',', '.', ',', '.', '\n']
# --------------------字符数量的表示-------------------------
s = 'aaQQ..1a269o\n'
# \d{11} 匹配连续的11位数字
res = re.findall('\d{3}', s)
print(res)
# ['269']
# \D{11} 匹配连续的11位非数字
res = re.findall('\D{3}', s)
print(res)
# ['aaQ', 'Q..']
# \s{11} 匹配连续的11位空白字符
res = re.findall('\s{2}', '123 as AQ,.,.\n')
print(res)
# [' ']
# \S{11} 匹配连续的11位非空白字符
res = re.findall('\S{2}', '123 as AQ,.,.\n')
print(res)
# ['12', 'as', 'AQ', ',.', ',.']
# \w{11} 匹配连续的11位单词字符
res = re.findall('\w{2}', '123 a__s AQ,.,.\n')
print(res)
# ['12', 'a_', '_s', 'AQ']
# \W{11} 匹配连续的11位非单词字符
res = re.findall('\W{2}', '123 a_s AQ,.,.\n')
print(res)
# [' ', ',.', ',.']
s = 'aaQQ..11a269o1113\n'
# \d{2,4} 匹配连续的2-4个数字;连续2,3,4都可以
res = re.findall('\d{2,4}', s)
print(res)
# ['11', '269', '1113']
# \d{2,} 匹配连续的至少2个数字;
res = re.findall('\d{2,}', s)
print(res)
# ['11', '269', '1113']
# * 表示前一个字符至少连续出现0次
res = re.findall('.*', s)
print(res)
# ['aaQQ..11a269o1113', '', '']
# + 表示前一个字符至少连续出现1次
res = re.findall('.+', s)
print(res)
# ['aaQQ..11a269o1113'] 贪婪模式所以全部符合
# TODO 在python中正则匹配数量的时候默认开启贪婪模式,即有多少拿多少;
# 在表示数量范围后面加个问号?,即可关闭贪婪模式,转为非贪婪模式; 问号?只能跟在数量范围后面,{n-m},{n,},*,+
res = re.findall('\d{2,4}?', s)
print(res)
# ['11', '26', '11', '13']
res = re.findall('\d{3,}?', s)
print(res)
# ['269', '111']
res = re.findall('\d*?', s)
print(res)
# ['', '', '', '', '', '', '', '1', '', '1', '', '', '2', '', '6', '', '9', '', '', '1', '', '1', '', '1', '', '3', '', '']
res = re.findall('\d+?', s)
print(res)
# ['1', '1', '2', '6', '9', '1', '1', '1', '3']
# --------------------字符边界的表示-------------------------
import re
# ^test 表示以test开头
res = re.findall('^test', 'test-1-2-oo-test')
print(res)
# ['test']
# $test 表示以test结尾
res = re.findall('test$', 'test-1-2-oo-test')
print(res)
# ['test']
# \b 表示单词边界;\b代表退格,需要加r关闭转义
res = re.findall(r'\btest', 'test1-1-2-oo-1test')
print(res)
# ['test']
# \B 表示非单词边界
res = re.findall('test\B', '1test-1-2-oo-test1')
print(res)
# ['test']
import re
# | 表示匹配多个规则,符合任意一个规则即可
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
res = re.findall('[a-z]{3}|\d{3}',data)
print(res)
# ['mem', 'ber', 'mem', 'ber', 'pwd', 'pwd', 'use', 'use', 'loa', 'loa']
# () 表示分组
res = re.findall('#.+?#',data)
print(res)
# ['#member_id#', '#pwd#', '#user#', '#loan#']
res = re.findall('#(.+?)#',data)
print(res)
# ['member_id', 'pwd', 'user', 'loan']
# 一个匹配规则中提取多组数据
res = re.findall('#(.+?)#.+?#(.+?)#.+?#(.+?)#.+?#(.+?)#',data)
print(res)
# [('member_id', 'pwd', 'user', 'loan')]
import re
# re.search() 匹配第一个符合规则的数据,返回一个匹配对象
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
res = re.search('#.+?#', data)
print(res) # <re.Match object; span=(14, 25), match='#member_id#'>
print(res.group()) # #member_id#
res = re.search('#(.+?)#', data)
print(res) # <re.Match object; span=(14, 25), match='#member_id#'>
print(res.group()) # #member_id#
print(res.group(1)) # member_id 在第一个括号内所以传1
# re.match() 匹配第一个符合规则的数据(必须在字符串的开头),返回一个匹配对象;匹配不到None
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
res = re.match('#.+?#', data)
print(res) # None 字符串的开头是{ , 所以re.match(r'{',data)才能匹配到
# re.sub() 需要传参 匹配规则、替换内容、需要匹配替换的字符串、count=0替换次数默认0即匹配到所有符合规则的全部替换
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
res = re.sub('#.+?#', '123', data)
print(res) # {"member_id":"123","pwd":"123","user":"123","loan":"123"}
res = re.sub('#.+?#', '123', data, count=1)
print(res) # {"member_id":"123","pwd":"#pwd#","user":"#user#","loan":"#loan#"}
import re
class Test():
member_id = 123
pwd = '123456'
user = 'daxigua'
loan = 5
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
res = re.search('#(.+?)#',data).group(1)
print(res) # member_id
attr = getattr(Test,res)
print(attr) # 123
data = re.sub('#(.+?)#',str(attr),data,count=1) # attr需要字符串格式否则报错TypeError: decoding to str: need a bytes-like object, int found
print(data) # {"member_id":"123","pwd":"#pwd#","user":"#user#","loan":"#loan#"}
res = re.search('#(.+?)#',data).group(1)
print(res) # pwd
attr = getattr(Test,res)
print(attr) # 123456
data = re.sub('#(.+?)#',str(attr),data,count=1)
print(data) # {"member_id":"123","pwd":"123456","user":"#user#","loan":"#loan#"
while循环实现替换
import re
class Test():
member_id = 123
pwd = '123456'
user = 'daxigua'
loan = 5
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法一 使用re.sub替换
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data).group(1)
key = getattr(Test, res)
data = re.sub('#(.+?)#', str(key), data, count=1)
print(data)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法二 使用replace替换
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data)
key = getattr(Test, res.group(1))
data = data.replace(res.group(),str(key))
print(data)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}
封装函数
import re
class Test():
member_id = 123
pwd = '123456'
user = 'daxigua'
loan = 5
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法一 使用re.sub替换
def re_data(data,cls):
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data).group(1)
key = getattr(cls, res)
data = re.sub('#(.+?)#', str(key), data, count=1)
return data
res1 = re_data(data,Test)
print(res1)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法二 使用replace替换
def re_data1(data,cls):
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data)
key = getattr(Test, res.group(1))
data = data.replace(res.group(),str(key))
return data
res1 = re_data1(data,Test)
print(res1)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}
# 如果要替换的一部分数据在配置文件 如pwd在配置文件
import re
from common.handle_conf import handle_yaml
class Test():
member_id = 123
# pwd = '123456'
user = 'daxigua'
loan = 5
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法一 使用re.sub替换
def re_data(data,cls):
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data).group(1)
# 使用try先去配置文件查找,如果没有再从类里面查找
try:
key = handle_yaml[res]
except:
key = getattr(cls, res)
data = re.sub('#(.+?)#', str(key), data, count=1)
return data
res1 = re_data(data,Test)
print(res1)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}
data = '{"member_id":"#member_id#","pwd":"#pwd#","user":"#user#","loan":"#loan#"}'
# 方法二 使用replace替换
def re_data1(data,cls):
while re.search('#(.+?)#', data):
res = re.search('#(.+?)#', data)
# 使用try先去配置文件查找,如果没有再从类里面查找
try:
key = handle_yaml[res.group(1)]
except:
key = getattr(Test, res.group(1))
data = data.replace(res.group(),str(key))
return data
res1 = re_data1(data,Test)
print(res1)
# {"member_id":"123","pwd":"123456","user":"daxigua","loan":"5"}