6.22

# douban
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time, re, json, requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from multiprocessing import Pool
from PIL import Image
from io import BytesIO


class DouBan(object):
    # options = webdriver.ChromeOptions()
    # options.headless = True

    def __init__(self):
        self.start_url = "https://movie.douban.com/"
        self.list_driver = webdriver.Firefox()
        self.detail_driver = webdriver.Firefox()

    def start(self, url):
        self.list_driver.get(url)
        a = WebDriverWait(self.list_driver, 15).until(lambda list_driver: list_driver.find_element_by_link_text('选电影'))
        a.click()
        aa = WebDriverWait(self.list_driver, 15).until(lambda driver: driver.find_elements_by_tag_name('label')[5])
        aa.click()
        self.get_list_url()

    def get_list_url(self):
        self.list_driver.implicitly_wait(20)
        a_list = self.list_driver.find_elements_by_class_name('item')
        for a in a_list:
            href = a.get_attribute('href')
            self.get_detail(href)

    def get_detail(self, url):
        self.detail_driver.get(url)
        self.detail_driver.implicitly_wait(20)
        m_name = self.detail_driver.find_element_by_css_selector('h1 span:first-child').text
        str = self.detail_driver.find_element_by_xpath('//div[@id="info"]').text
        m_tuple = re.search(re.compile(r'导演: (.*?)\n编剧: (.*?)\n主演: (.*?)\n类型: (.*?)\n.*?制片国家/地区: (.*?)\n语言: (.*?)\n上映日期: (.*?)\n片长: (.*?)\n', re.S), str).groups()
        print(m_name, m_tuple)

    def login(self, url):
        self.list_driver.get(url)
        s_input = self.list_driver.find_element_by_id('email')
        s_input.send_keys('13526080969')
        p_input = self.list_driver.find_element_by_id('password')
        p_input.send_keys('bas429xxx')
        c_input = self.list_driver.find_element_by_id('captcha_field')
        c_url = self.list_driver.find_element_by_id('captcha_image').get_attribute('src')
        captcha = self.get_captcha(c_url)
        c_input.send_keys(captcha)
        button = self.list_driver.find_element_by_class_name('btn-submit')
        button.click()

    def get_captcha(self, captcha_url):
        response = requests.get(captcha_url)
        im = BytesIO(response.content)
        image = Image.open(im)
        image.show()
        captcha = input('请输入验证码:')
        return captcha


if __name__ == '__main__':
    douban = DouBan()
    douban.login('https://accounts.douban.com/login?source=movie')
    douban.start(douban.start_url)

# 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=0'
# 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=20'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值