简单python的自带库re和正则匹配_python re匹配数组-优快云博客

本文链接：https://blog.youkuaiyun.com/tlp_zzm/article/details/134453072
最近在搞python的安全编程,正好写到这块了,就是一些常见的函数
不做太多详解了,可以有助于大家快速入门

import re

# findall函数
print(re.findall("hello","heloodsdshello"))
print(re.findall("hel.o","heloodsdshello"))   # .匹配除换行符以外的任何单个字符
print(re.findall("\d","012121415heloodsdshello"))    #\d 相当与[0-9],且一个\d等于[0-9]
print(re.findall("\D","012121415heloodsdshello"))     #\D相当于与 [^0-9],就是除了数字
print(re.findall("hel[asd]o","heloodsdshello"))      #[asd]表达是匹配其中任意一个字母
print(re.findall("hel[^asd]]o","heloodsdshello"))      #[^asd]反义词
print(re.findall("hel[a-z]]o","heloodsdshello"))      #所的字母

# X?   匹配前面的子表达式X零次或一次，要匹配 ？ 字符使用 \？
# X*   匹配前面的子表达式X零次或多次，要匹配 * 字符使用 \*
# X+   匹配前面的子表达式X一次或多次,要匹配 + 字符使用 \+
# X{n} 匹配前面的子表达式X n次
# X{n,}    匹配前面的子表达式X最少n次
# X{n,m}   匹配前面的子表达式X最少n次,不超过m次

print(re.findall("hel*oword","hahwiehelowordhahah"))  #匹配0到多个
print(re.findall("hel+oword","hahwiehelowordhahah"))  #匹配1到多个
print(re.findall("hel?oword","hahwiehellowordhahah"))  #?匹配前面的字符0到1个,有多个只匹配1个
print(re.findall("hel{3}oword","hahwiehelllowordhahah")) #{}控制前面字符的匹配个数
print(re.findall("^helloword","hahwiehellowordhahah"))  #^号匹配字符串或者行的开头
print(re.findall("helloword$","hahwiehellowordhahah"))  #$号匹配字符串或者行的结尾
print(re.findall(r"\bhello","hello hahah"))  #\b匹配单词的边界

#*, +, ?, {m, n}限定符都是贪婪的;它们匹配尽可能多的文本，比如 <.* > 与'<a>b<c>'匹配，将匹配到 <a>b<c>，而不仅仅匹配到 <a>
#通过在限定符后添加？将以非贪婪模式进行匹配，匹配到尽可能到的字符，比如<.*？>将匹配到<a>

a=re.compile(pattern=r'(\d)')
search = a.search(string='0325446asdsd')
print(search)
print(search.groups())

# compile(pattern, flags=0)
# pattern: 字符串形式的正则表达式
# flags: 正则表达式修饰符，匹配模式，比如忽略大小写，多行模式
# flags:
# 正则表达式可以包含一些可选标志修饰符来控制匹配的模式
# 多个标志可以通过按位 OR(|) 它们来指定。如 re.I | re.M 被设置成 I 和 M 标志


pattern= re.compile(pattern='hello*word',flags=0)
result=pattern.match('helloooooword')
print(result)

result=re.match(pattern='hello*word',string='hellooooword',flags=0)
print(result)

# Pattern.search(string,pos,endpos)
# Pattern.match(string,pos,endpos)
# Pattern.fullmatch(string,pos,endpos)
# Pattern.findall(string,pos,endpos)
# string: 必填参数，表示待查找的字符串,
# pos: 可选参数，从指定的pos索引位置开始搜索，默认为0,
# endpos:可选参数，指定截至位置

# pattern.split(string, maxsplit=0)
# string: 必填参数，表示待匹配的字符串
# maxsplit: 分隔次数，maxsplit = 1表示分隔一次，默认为0，不限制次数

# pattern.sub(repl, string, count=0, flags=0)
# repl: 替换的字符串，可以是函数
# string: 要被查找替换的原始字符串
# count: 模式匹配后替换的最大次数，默认0表示替换所有的匹配

pattern =re.compile(pattern='hello*word',flag=0)
result=pattern.search("helloooooooword",5)
print(result)

# pattern.flags
# 1、re.IGNORECASE (re.I) 忽略大小写
# 2、re.ASCII (re.A) 只匹配ASCII   让 \w, \W, \b, \B, \d, \D, \s 和 \S 只匹配ASCII，不匹配Unicode
# 3、re.MULTILINE (re.M) 多行模式 ^表示匹配行的开头，默认模式下它只能匹配字符串的开头；而在多行模式下，它还可以匹配 换行符\n后面的字符
# 正则语法中^匹配行开头、\A匹配字符串开头，单行模式下效果一致，多行模式下\A不能识别\n
# 4、re.DOTALL (re.S)  .号可以匹配包括换行符在内的所有字符 包括换行符\n (默认模式下是不能匹配行符\n的)
# 5、re.VERBOSE (re.X)  可识别注释信息,自动忽略注释信息
# 6、re.LOCALE (re.L)做本地化识别（locale-aware）匹配
# 7、re.UNICODE (re.U)表示特殊字符集 \w, \W, \b, \B, \d, \D, \s, \S 依赖于 Unicode 字符属性


# re自带的函数,跟pattern差不多
# re.search(pattern, string, flags=0)
# re.match(pattern, string, flags=0)
# re.fullmatch(pattern, string, flags=0)
# re.findall(pattern, string, flags=0)
# re.split(pattern, string, maxsplit=0, flags=0)
# re.sub(pattern, repl, string, count=0, flags=0)

pattern= re.compile(pattern='hello*word',flags=0)
patterns = re.compile(pattern='hello(?:o)*?word',flags=0)
resource = patterns.search('hellooooooooword',pos=6,endpos=8)
result= pattern.search("hellooooooooword")
pattern = re.compile(pattern='hello(?:o)*?word',flags=0)
searchResult = pattern.search('hahwiehelloooooowordhahah',endpos=20)
print(searchResult)
print(result)
print(resource)