【Selenium 爬爬 猪 八 戒】

直接上代码

from concurrent.futures import ThreadPoolExecutor
import time
import os
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import  Options
from selenium import webdriver
from selenium.webdriver import ActionChains


rootrurl = "https://beijing.zbj.com/search/f/?kw=%E6%95%B0%E6%8D%AE%E5%BA%93"
chromeExeLoc = 'D:/software/chrome/chromedriver_win32/chromedriver.exe'
headers = {
    "Referer": rootrurl,
    'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
    'Accept-Language': 'en-US,en;q=0.8',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive'
}  ###设置请求的头部,伪装成浏览器

def test():

    # 无头浏览器 这样浏览器就不会弹出那个chrome的web浏览器界面
    options = Options()
    options.add_argument('--headless')
    driver = webdriver.Chrome(chromeExeLoc)
    #driver = webdriver.Chrome(chromeExeLoc, options=options)
    driver.maximize_window()  # 最大化窗口
    driver.get(rootrurl)

    # 找到加载更多的按钮,多加载一些
    driver.implicitly_wait(30)  # 设置隐式等待
    driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')  # 下拉到最下面
    driver.find_element_by_class_name('dialog-bottom-close').click()  # 这个黑框框需要删除,不然会阻拦加载更多按钮的点击
    for i in range(1, 2):
        time.sleep(3)  # 固定等待

        driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
        elem = driver.find_element_by_class_name('more-result').find_element_by_tag_name('button')
        ActionChains(driver).move_to_element(elem).perform()
        elem.click()  # 点击加载更多
        print('loading more...')

    # 发现所有的items
    elems = driver.find_elements_by_class_name('witkey-item')
    for item in elems:
        # 下载每个信息
        text = item.find_element_by_class_name('service-shop').find_element_by_class_name('text-overflow').text
        city = item.find_element_by_class_name('service-shop').find_element_by_class_name('city-icon')\
            .find_element_by_tag_name('span').text

        elem = item.find_element_by_class_name('service-icons')
        cons = []
        if elem is not None:
            es = elem.find_elements_by_class_name('tag-text')
            for t in es:
                cons.append(t.text)

        price = item.find_element_by_class_name('service-price').find_element_by_class_name('price').text
        amount = item.find_element_by_class_name('service-price').find_element_by_class_name('amount').text

        title = item.find_element_by_class_name('service-title').find_element_by_tag_name('p').text

        tags_e = item.find_element_by_class_name('service-tags')
        tags = []
        if tags_e is not None:
            tags = tags_e.text.split("\n")

        print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(text, city, cons, price, amount, title, tags))

        pass




    driver.close()

if __name__ == '__main__':
    test()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值