import requests
from fake_useragent import UserAgent
from lxml import etree
headers={
'User-Agent':UserAgent().random
}
def get_MovieUrl(index_url):
response=requests.get(index_url,headers=headers)
e=etree.HTML(response.text)
movie_hrefs=e.xpath('//div[@class="movie-item"]/a/@href')
#把每一部电影的链接地址都放到list中
movie_urls=[]
for num in range(len(movie_hrefs)):
movie_urls.append('https://maoyan.com{}'.format(movie_hrefs[num]))
#print( movie_urls)
return movie_urls
def get_MovieInfo(movie_url):
response = requests.get(movie_url, headers=headers)
e = etree.HTML(response.text)
name = e.xpath('//h3[@class="name"]/text()')[0]
type=e.xpath('//li[@class="ellipsis"]/text()')[0]
#celebrity属性后面有空格,不能忽略
director=e.xpath('normalize-space(//div[@class=&#
xpath爬取猫眼电影
最新推荐文章于 2021-09-06 16:08:30 发布