Python电影排行并写入Excel


import requests
from lxml import etree
import xlwt

url = 'https://movie.douban.com/top250?start=%d&filter='
headers = {
    'User-Agent': '',
    'Cookie': '',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'

}


def get_info(start):
    resp = requests.get(url % start, headers=headers)
    content = resp.content.decode('utf-8')
    return content


def parse_content(content, resultlist):
    html = etree.HTML(content)
    infoBlocks = html.xpath('//*[@id="content"]/div/div[1]/ol/*/div')
    for element in infoBlocks:
        resultlist.append(parse_info(element))


def parse_info(element):
    picelement = element.xpath('./div[@class="pic"]')
    # 电影链接
    link = picelement[0].xpath('./a/@href')
    # 图片链接
    imgUrl = picelement[0].xpath('./a/img/@src')
    infoelement = element.xpath('./div[@class="info"]')
    hdelement = infoelement[0].xpath('./div[@class="hd"]')
    # 片名
    title = hdelement[0].xpath('./a/span[1]/text()')
    # 别名
    alias1 = hdelement[0].xpath('./a/span[2]/text()')
    # 别名2
    alias2 = hdelement[0].xpath('./a/span[3]/text()')
    # 详情
    bdelement = infoelement[0].xpath('./div[@class="bd"]')
    # todo 从字段中解析出主演等信息
    detailstr = bdelement[0].xpath('./p[1]/text()')
    # 主演
    # 上映时间
    # 上映地点
    # 类型

    # 评分
    score = bdelement[0].xpath('./div[1]/span[2]/text()')
    # 评价人数
    num = bdelement[0].xpath('./div[1]/span[4]/text()')

    # 热评
    hotcomment = bdelement[0].xpath('./p[2]/span/text()')

    return {'link':link,'imgUrl':imgUrl,'title':title,'alias1':alias1,'alias2':alias2,
           'detail':detailstr,'score':score, 'num':num,'hotcomment':hotcomment}
def save_2_excel(resultlist):
    wb = xlwt.Workbook()
    ws = wb.add_sheet('豆瓣点评top250')
    title = {'link':'电影链接', 'imgUrl':'图片链接', 'title':'片名','alias1':'别名1','alias2':'别名2'
             ,'detail':'详情','score':'评分','num':'评价人数','hotcomment':'热评'}

    for index,item in enumerate(title.keys()):
        ws.write(0,index, title[item])
    row = 1
    for item in resultlist:
        for index, key in enumerate(title.keys()):
            ws.write(row, index,item[key])
        row += 1

    wb.save('豆瓣top250.xls')

def main():
    resultlist = []
    for i in range(10):
        content = get_info(i * 25)
        parse_content(content,resultlist)

    save_2_excel(resultlist)


if __name__ == '__main__':
    main()

在这里插入图片描述

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值