直接上代码
from concurrent.futures import ThreadPoolExecutor
import time
import os
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver import ActionChains
rootrurl = "https://beijing.zbj.com/search/f/?kw=%E6%95%B0%E6%8D%AE%E5%BA%93"
chromeExeLoc = 'D:/software/chrome/chromedriver_win32/chromedriver.exe'
headers = {
"Referer": rootrurl,
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
'Accept-Language': 'en-US,en;q=0.8',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive'
} ###设置请求的头部,伪装成浏览器
def test():
# 无头浏览器 这样浏览器就不会弹出那个chrome的web浏览器界面
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(chromeExeLoc)
#driver = webdriver.Chrome(chromeExeLoc, options=options)
driver.maximize_window() # 最大化窗口
driver.get(rootrurl)
# 找到加载更多的按钮,多加载一些
driver.implicitly_wait(30) # 设置隐式等待
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)') # 下拉到最下面
driver.find_element_by_class_name('dialog-bottom-close').click() # 这个黑框框需要删除,不然会阻拦加载更多按钮的点击
for i in range(1, 2):
time.sleep(3) # 固定等待
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
elem = driver.find_element_by_class_name('more-result').find_element_by_tag_name('button')
ActionChains(driver).move_to_element(elem).perform()
elem.click() # 点击加载更多
print('loading more...')
# 发现所有的items
elems = driver.find_elements_by_class_name('witkey-item')
for item in elems:
# 下载每个信息
text = item.find_element_by_class_name('service-shop').find_element_by_class_name('text-overflow').text
city = item.find_element_by_class_name('service-shop').find_element_by_class_name('city-icon')\
.find_element_by_tag_name('span').text
elem = item.find_element_by_class_name('service-icons')
cons = []
if elem is not None:
es = elem.find_elements_by_class_name('tag-text')
for t in es:
cons.append(t.text)
price = item.find_element_by_class_name('service-price').find_element_by_class_name('price').text
amount = item.find_element_by_class_name('service-price').find_element_by_class_name('amount').text
title = item.find_element_by_class_name('service-title').find_element_by_tag_name('p').text
tags_e = item.find_element_by_class_name('service-tags')
tags = []
if tags_e is not None:
tags = tags_e.text.split("\n")
print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(text, city, cons, price, amount, title, tags))
pass
driver.close()
if __name__ == '__main__':
test()