随笔:正则表达式(二)
一、方法封装:
import re
def regular_expression(value, start_value='', end_value='', match_type=0):
"""
:param value: 待测文本
:param start_value: 开始的字段,注意特殊符号,需要转换
:param end_value: 结束的字段,注意特殊符号,需要转换
:param match_type:匹配模式,0表示开头结尾不包含start_value、end_value
:return: 返回正则截取的结果
"""
text = re.sub('\s+', '', value)
if start_value == '' and end_value != '':
"""
到end_value截止,不包含end_value
"""
pattern = re.compile(r"(.*)%s" % end_value)
result = re.search(pattern, text).group(1)
elif start_value != '' and end_value == '':
"""
从start_value开始,不包含start_value
"""
pattern = re.compile(r'%s(.*)' % start_value)
result = re.search(pattern, text).group(1)
elif start_value != '' and end_value != '' and match_type == 0:
"""
从start_value开始,不包含start_value;到end_value截止,不包含end_value
"""
pattern = start_value + '(.*?)' + end_value
result = re.search(pattern, text).group(1)
else:
"""
从start_value开始,到end_value截止,包含start_value、end_value
"""
pattern = re.compile(r"%s+(.*?)+%s" % (start_value, end_value))
result = pattern.search(value).group()
return result
二、示例
value = '1tagtask_ogdata_c2...'
start_value = '1'
end_value = '2'
s = regular_expression(value, start_value, end_value)
print(s)
s1 = regular_expression(value, end_value=end_value)
print(s1)
s2 = regular_expression(value, start_value=start_value)
print(s2)
s3 = regular_expression(value, start_value, end_value, match_type=1)
print(s3)
三、结果
tagtask_ogdata_c
1tagtask_ogdata_c
tagtask_ogdata_c2...
1tagtask_ogdata_c2