python爬取 maoyan 电影评分

最新推荐文章于 2024-04-30 18:40:22 发布

Conan_ft

最新推荐文章于 2024-04-30 18:40:22 发布

阅读量609

点赞数

分类专栏： Python 人工智能算法文章标签： xpath python xml

本文链接：https://blog.youkuaiyun.com/qq_37137713/article/details/109907131

版权

Python 同时被 3 个专栏收录

16 篇文章

订阅专栏

算法

10 篇文章

订阅专栏

人工智能

6 篇文章

订阅专栏

##########################
#author ：Conan_ft
#Email：634598660@qq.com
import requests
from lxml import etree
from lxml import html
#from lxml.html import fromstring, tostring
url = “https://maoyan.com/films”
headers = {
‘Referer’: ‘https://maoyan.com/films’,
‘Connection’:‘keep-alive’,
‘Host’:‘maoyan.com’,
‘Cookie’:‘uuid_n_v=v1; uuid=340E2D802BED11EB82D08318B644F2A4A83B9D90FA1F48BDA4C833A58466F5A9; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1605958336,1605959248; _lxsdk_cuid=175ea93803db-0165966f617246-4c3f2678-1fa400-175ea93803ec8; _lxsdk_s=175ea938040-f87-df9-32d%7C%7C9; _lxsdk=340E2D802BED11EB82D08318B644F2A4A83B9D90FA1F48BDA4C833A58466F5A9; __mta=209639085.1605958336700.1605959257465.1605960054434.4; _csrf=792cc35eeb4498786e5dd80b985d35ae4c0147ecfff3719d7a84a6fbd4f286b1; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1605960054’,
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0’
}
class Spider(object):
def start_requests(self):

    #response = requests.get("https://maoyan.com/films")
    response = requests.get(url,headers = headers)
    #print(response.encoding)
    response.encoding = 'utf-8'
    #print(response.text)
    xml = etree.HTML(response.text)

    film_tit_list = xml.xpath('//div[@class="channel-detail movie-item-title"]/a/text()')
    a_score = xml.xpath('//div[@class="channel-detail channel-detail-orange"]/i/text()')
    b_score = xml.xpath('//div[@class="channel-detail channel-detail-orange"]/i/text()')
    film_src_list = xml.xpath('//div[@class="channel-detail movie-item-title"]/a/@href')
    #print(film_tit_list,film_src_list)
    #for tit_list,src_list in zip(film_tit_list,film_src_list):
        #print(tit_list,src_list)
    for tit_list,first_score,second_score,src_list in zip(film_tit_list,a_score,b_score,film_src_list):
        finall_url = "https://maoyan.com" + src_list
        score = first_score + second_score
        print(tit_list,finall_url,score)
        #self.next_file(tit_list,src_list)

spider = Spider()
spider.start_requests()

####猫眼有反爬机制，一定要有header。

在这里插入图片描述