import re
import urllib.request
import ssl
def gethtml(url):
header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'}
request=urllib.request.Request(url,headers=header)
response=urllib.request.urlopen(request)
data=response.read().decode("utf-8")
return data
def gettitle(hh):
gh={}
html=hh
patter=re.compile('<span style="font-size:13px;">(.*?)</span>',re.S)
scan=re.findall(patter,html)
patter1=re.compile('<span class="rating_nums">(.*?)</span>',re.S)
scan1=re.findall(patter1,html)
patter2=re.compile('<span class="pl">(.*?)</span>',re.S)
scan2=re.findall(patter2,html)
i = 0
j = 0
for x in scan:
gh[x]=" "+scan1[i]+" "+scan2[j]
i+=1
j+=1
return gh
ssl._create_default_https_context = ssl._create_unverified_context
url="https://movie.douban.com/chart"
hh=gethtml(url)
html=gettitle(hh)
print("电影名称"," 评分"," 人数")
for x in html:
print(x, html[x])
python爬虫
最新推荐文章于 2024-10-04 10:19:07 发布