from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FOptions
import time
from scrapy.http import HtmlResponse
class SeleniumMiddlewares(object):
def __init__(self):
self.options = FOptions()
#self.options.add_argument("-headless")
self.browser = webdriver.Firefox(executable_path="/home/hello/Downloads/geckodriver",firefox_options=self.options)
def process_request(self,request,spider):
if int(request.meta['page']) == 1:
self.browser.get(request.url)
time.sleep(5)
for i in range(1,8):
self.browser.execute_script("window.scrollTo(0,{})".format(i *1000))
time.sleep(2)
self.browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(2)
if int(request.meta['page']) == 2:
self.browser.get(request.url)
self.browser.implicitly_wait(10)
self.browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(2)
next_page = self.browser.find_element_by_xpath('//em[contains(text(),"下一页")]')
next_page.click()
time.sleep(3)
for i in range(1, 8):
self.browser.execute_script("window.scrollTo(0,{})".format(i *1000))
time.sleep(2)
self.browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(2)
return HtmlResponse(url=self.browser.current_url,body=self.browser.page_source,encoding="utf-8",request=request)