match()
import re pattern = re.compile(r'\d+') m = pattern.match('12one') print m print m.group() print m.start() print m.end() print m.span()
import re pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I) m = pattern.match('hello world wide web') print m.group() print m.group(1) print m.group(2) print m.groups()
search()
import re pattern = re.compile(r'\d+') m = pattern.search('one12') print m.group()
import re pattern = re.compile(r'\d+') m = pattern.search('hello 123 456') if m: print 'matching string:', m.group() print 'position:', m.span()
findall()
import re pattern = re.compile(r'\d+') result1 = pattern.findall('hello 123 456') result2 = pattern.findall('one12two34') print result1 print result2
import re pattern = re.compile(r'\d+\.\d+') result = pattern.findall("123.141593, 'bigcat', 232312, 3.15, 3.") for item in result: print item
finditer()
import re pattern = re.compile(r'\d+') result_iter1 = pattern.finditer('hello 123456 789') result_iter2 = pattern.finditer('one1two2three3four4', 0, 10) print type(result_iter1) print type(result_iter2) for m1 in result_iter1: print m1.group(), m1.span() for m2 in result_iter2: print m2.group(), m2.span()
split()
import re p = re.compile(r'[\s\,\;]+') print p.split('a,b;; c d')
sub()
#coding:utf-8 import re p = re.compile(r'(\w+) (\w+)') s = 'hello 123, hello 456' print p.sub(r'hello world', s) print p.sub(r'\2 \1', s) print p.sub(lambda m:'h1' + ' ' + m.group(2), s) print p.sub(lambda m:'h1' + ' ' + m.group(2), s, 1)
匹配中文
#coding:utf-8 import re title = u'你好,hello,世界' pattern = re.compile(ur'[\u4e00-\u9fa5]+') result = pattern.findall(title) print result
非贪婪
import re line = "boooooobbby123" regex_str = ".*?(b.*?b).*" match_obj = re.match(regex_str, line) if match_obj: print(match_obj) print(match_obj.group(1))