正则表达式
r'\t'使用原始字符串来表示它们,模式元素(r'\t',等价于 '\\t')匹配相应的特殊字符。 re.S 使 . 匹配包括换行在内的所有字符 (.*?) 匹配分组,(.*? )后面多个问号,代表非贪婪模式,也就是说只匹配符合条件的最少字符,分组返回的是列表
ss = re.findall(r"</strong>(.*?)<br/><br/>",strpp,re.S)
实例1:
itemleft = re.search(r'(Item\s+offered)',desc,re.I|re.S) fitleft = re.search(r'(Fits?\s+models?\s+&\s+years?)',desc,re.I) # 获得匹配第一个字符的索引
实例2:
#将描述的多空格换成空格,|换成空格 将-前后空格去掉 desc = re.sub(r"\s+", " ", desc) #\s匹配一个空白符 desc = re.sub("\|", " ", desc) desc = re.sub("\s*-\s*", "-", desc, re.I) # print(desc) desc = re.sub("(folder)|(folding)", "folding", desc, re.I) #re.I忽略大小写 desc = re.sub("(Manually)|(Manual)", "Manual", desc, re.I) desc = re.sub("(telescopic)|(Telescoping)", "Telescoping", desc, re.I)
se = re.search(r"(Manual folding)|(Power Folding: No)|(Manual [\s\w]*? folding)", desc, re.I) #w匹配任意字母数字字符或下划线字符
替换
df = df.replace({"desc":{"\|":"\\n"}},regex=True) df = df.replace({"fits":{"\|":"\\n"}},regex=True)
#数据库里的描述 & & 两种形式的数据都有,强制将&转换为& df = df.replace({"description":{"\B&\B":"&"}},regex=True) # html转义字符
model = str(model).replace("&", "&") #因为model town & country的特殊性,需要特殊处理 res = "-\d{4} "+ str(model)
实例3:a = re.search(r'(door\b)|(cab\b)|(sedan\b)|(doors\b)',fitment) #\b 边界
s_fitment = s_fitment.replace("|","\n") #换行,方便后面正则表达式匹配
实例4:desc = str(df.loc[i,"new_description_new"]) sql转义字符 desc = desc.replace("\\","\\\\") desc = desc.replace("\'","\\\'") desc = desc.replace("\"","\\\"") desc = desc.replace("\n\n","\\n") desc = desc.replace("\r\n","\\n") desc = desc.replace("\n","\\n") desc = desc.replace("\t","\\t")
正则表达式实例1
最新推荐文章于 2024-05-21 17:44:22 发布