#from bs4 import BeautifulSoup#没有用bs4
import requests
from pyquery import PyQuery as pq
def open_url(url_link,headers):
r = requests.get(url_link,headers=headers)
#soup = BeautifulSoup(r.content, "lxml")
item=r.text
find_ue(item)
def find_ue(r):
list = []
doc = pq(r)
items = doc('dd').items()
for item in items: # 提取
ranking = item('.board-index').text()
title = item('.name').text() # title
actor = item('.star').text() # actor
time= item('.releasetime').text() # time
info = {'Ranking': ranking,'title': title, 'actor': actor, 'time': time} # 拼接
list.append(info) # list添加
for i in list:
print(i)
if __name__ == '__main__':
url_link="https://maoyan.com/board" # 网页链接
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
} # headers
open_url(url_link,headers)
猫眼,热映榜单,
最新推荐文章于 2024-06-02 12:22:21 发布