#python
import urllib.request,re
with urllib.request.urlopen('https://top.baidu.com/') as file:
print("网页状态:",file.status,file.reason)
data = file.read().decode('utf-8')
reg = '<title>(.*?)</title>'
title = re.findall(reg,data,re.S|re.M)
print("title:",title)
#热搜
reg = '<div class="c-single-text-ellipsis">(.*?)</div>'
content = re.findall(reg, data, re.S | re.M)
i=0
print("热搜:\n")
while i<20:
print(content[i])
i=i+2
#榜单名
reg = '<span class="title_jDbBV c-theme-color">(.*?)</span>'
榜单名 = re.findall(reg, data, re.S | re.M)
print("榜单名:\n", 榜单名)
#游戏排行榜
reg = 'rsv_dl=fyb_hp_game" class="c-single-text-ellipsis name_3SMKh" target="_blank">(.*?)</a>'
游戏排行榜 = re.findall(reg, data, re.S | re.M)
print("游戏排行榜:\n", 游戏排行榜)
#汽车排行榜
reg = 'rsv_dl=fyb_hp_car" class="c-single-text-ellipsis name_3SMKh" target="_blank">(.*?)</a>'
汽车 = re.findall(reg, data, re.S | re.M)
print("汽车排行榜:\n", 汽车)
正则表达式匹配百度热搜
最新推荐文章于 2024-08-26 17:00:25 发布