>>> import re
>>> m = re.match('f..','food') # 匹配到返回对象,匹配开头
>>> print(re.match('f..','seafood')) #匹配不到返回None
None
>>> m.group() #返回匹配的值
'foo'
>>> m = re.search('f..','seafood') #匹配没有限制
>>> m.group()
'foo'
>>> re.findall('f..','seafood is food') #返回所有匹配项组成的列表
['foo', 'foo']
>>> result = re.finditer('f..','seafood is food') # 返回匹配对象组成的迭代器
>>> for m in result: # 从迭代器中逐个取出匹配对象
... print(m.group())
...
foo
foo
>>> re.sub('f..','abc','fish is food') #相当于匹配后替换
'abch is abcd'
>>> re.split('\.|-','hello-word-.tar.gz') #切割,用.和-做切割符号
['hello', 'word', '', 'tar', 'gz']
>>> patt = re.compile('f..') # 先把要匹配的模式编译,提升效率
>>> m = patt.search('seafood') # 指定在哪个字符串中匹配
>>> m.group()
'foo'
#匹配文件中指定模式
import re
def count_patt(fname,patt):
cpatt = re.compile(patt)
result = {}
with open(fname) as fobj:
for line in fobj:
m = cpatt.search(line) #如果匹配不到,返回None
if m:
key = m.group()
result[key] = result.get(key,0) + 1 #如果有值,返回值key,没有返回0
return result
if __name__ == '__main__':
fname = 'access_log' # apache日志文件
ip = '^(\d+\.){3}\d+' # 日志开头的ip地址
print(count_patt(fname,ip))
br = 'Firefox|MSIE|Chrome' # 日志中客户端浏览器
print(count_patt(fname,br))
#模式匹配进阶写法
import re
from collections import Counter # Counter对象是有序的,字典无序
class CountPatt:
def __init__(self,fname):
self.fname = fname
def count_patt(self,patt):
cpatt = re.compile(patt)
result = Counter()
with open(self,fname) as fobj:
for line in fobj:
m = cpatt.search(line) #如果匹配不到,返回None
if m:
result.update([m.group()])
return result
if __name__ == '__main__':
c = CounterPatt('access_log')
ip = '^(\d+\.){3}\d+'
br = 'Firefox|MSIE|Chrome'
a = c.count_patt(ip)
print(a)
print(a.most_common(3))
print(c.count_patt(br))