第十九天 总结
from re import fullmatch
'asd12asd58grw89grw13'
result = fullmatch(r'([a-z]{3}\d{2}){4}','asd12asd58grw89grw13')
print (result)
'a'
result = fullmatch(r'(\d)a\1','9a9')
print(result)
from re import findall
str1 = 'wq151as65d16as1d65a1sd1a32sd16as16as51d'
result = findall(r'[a-z](\d+)',str1)
print(result)
result
result = fullmatch(r'abc(\d{3}|[A-Z]{3})','abc123')
print(result)
from re import fullmatch
'asd12asd58grw89grw13'
result = fullmatch(r'([a-z]{3}\d{2}){4}','asd12asd58grw89grw13')
print (result)
'a'
result = fullmatch(r'(\d)a\1','9a9')
print(result)
from re import findall
str1 = 'wq151as65d16as1d65a1sd1a32sd16as16as51d'
result = findall(r'[a-z](\d+)',str1)
print(result)
result
result = fullmatch(r'abc(\d{3}|[A-Z]{3})','abc123')
print(result)from re import fullmatch
'''
'''
'''
a*b - b前面有任意多个a
\d*b - b前面有任意多个数字
'''
result = fullmatch(r'a*b', 'aab')
print(print)
result = fullmatch(r'\d*b', '67b')
print(result)
result = fullmatch(r'[xyz]*b', 'xyxb')
print(result)
result = fullmatch(r'a+b', 'aaaab')
print(result)
result = fullmatch(r'\d?abc', '4abc')
print(result)
result = fullmatch(r'[+]?[1-9]\d*', '+213123')
print(result)
'''
{M,N} - M到N次
{M,} - 至少M次
{N} - N次
{,N} - 最多N次
'''
result = fullmatch(r'a{3}b', 'aaab')
print(result)
from re import *
def get_description():
html=open('./top250.html',encoding='utf-8').read()
result = findall(r'<span class="inq">.+?</span>',html)
print(result)
get_description()
'''
re模块是python用来支持正则相关操作的系统模块
fullmatch(正则表达式,字符串) - 判断正则表达式和指定的字符串是否完全匹配,不能匹配结果是None
'''
from re import fullmatch
result = fullmatch(r'abc', 'abc')
print(result)
result = fullmatch(r'a..b','arib')
print(result)
result = fullmatch(r'a\db','a2b')
print(result)
result = fullmatch(r'abc\s123','abc\n123')
print(result)
result = fullmatch(r'\Dabc\S123','Mabc=123')
print(result)
'''
情况一:[xyz] -匹配x或者y或者z
情况二:包含\开头的匹配符号,匹配符号的功能有效
情况三:减号在两个符号之间,表示谁到谁
[a-z] 匹配任意一个小写字母
[A-Z] 匹配任意一个大写字母
[a-zA-Z]匹配任意一个大小写字母
[\u4e00-\u9fa5] -匹配中文
'''
result = fullmatch(r'a[xy\d]c','a2c')
print(result)
result = fullmatch(r'1[a-z]2','1k2')
print(result)
result = fullmatch(r'1[A-Z\dmn]2','1n2')
print(result)
result = fullmatch(r'1[^xyz]2','1是2')
print(result)
from re import *
"""
在匹配次数不确定的时候,匹配模式分为贪婪和非贪婪两种(默认是贪婪模式)
*、+、{M,N}、{M,}、{,N}、? - 贪婪
*?、+?、{M,N}?、{M,}?、{,N}?、?? - 非贪婪
注意:python中处理fullmatch以外都可能出现贪婪和非贪婪的问题
"""
result = search('.+b', '试试bshbsbj823')
print(result)
result = search('.+?b', '试试bshbsbj823')
print(result)
import requests
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
}
r = requests.get(url, headers=headers)
r.encoding = r.apparent_encoding
return r.text
if __name__ == '__main__':
html = get_html('https://movie.douban.com/top250')
result = findall(r'alt="(.+?)"', html)
print(result)
from re import fullmatch, findall
result = fullmatch(r'([a-z]{3}\d{2}){3}', "abn23msk78skh89")
print(result)
result = fullmatch(r'(ab){3}', 'ababab')
print(result)
result = fullmatch(r'(\d)a\1', '9a9')
print(result)
result = fullmatch(r'(\d)([a-z])\2\1', '1mm1')
print(result)
result = fullmatch(r'(\d)([a-z])\2\1{3}', '1mm111')
print(result)
str1 = 'sf262数据=22是12389mn89ksJK283'
result = findall(r'[a-z](\d+)', str1)
print(result)
result = findall(r'([a-z]{2})=(\d{2})', 'ahjs=2536,话计算机的3223环境-=yu=是mns=239-失败')
print(result)
result = fullmatch(r'abc\d{3}|abc[A-Z]{3}', 'abc231')
print(result)
result = fullmatch(r'\w{4}|\w{4}-\w{3}','back')
print(result)
result = fullmatch(r'')
from re import fullmatch
result = fullmatch(r'a\.b', 'a.b')
print(result)
result = fullmatch(r'\+?[1-9]\d*', '+23')
print(result)
result = fullmatch(r'\(\d{3}\)', '(234)')
print(result)
result = fullmatch(r'[+]?[1-9]\d*', '+23')
print(result)
result = fullmatch(r'[-ab^]123', '-123')
print(result)
result = fullmatch(r'(?i)abc', 'AbC')
print(result)
result = fullmatch(r'a.b', 'a\nb')
print(result)
result = fullmatch(r'(?is)a.b', 'a\nB')
print(result)
print(result.group())
from re import fullmatch, match, search, findall, finditer, split, sub