# -*- coding:utf-8 -*- from selenium import webdriver from bs4 import BeautifulSoup import pdfcrowd import re import time if __name__ == '__main__': # options = webdriver.ChromeOptions() # options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"') # driver = webdriver.Chrome(chrome_options=options) # driver = webdriver.Firefox() driver = webdriver.PhantomJS('E:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe') # driver.set_page_load_timeout(20) driver.get('http://view45.book118.com/pdf/dXAyNS5ib29rMTE4LmNvbS44MFwzNzAxNjgtNTkyODhkYjZlY2YzZi5kb2N4?readpage=g2xLH%404OWN2hJtxm49ja3Q%3D%3D&furl=YOQStEpojXDkR5q1h7jAA6aP2h6TJ9QI%409WXyuBb6wi1A9xo624sBXDTyLmGc8KG6I6E4wyI8nSzs61yjeSGy0WXsg70I77KTh9GrnOK3R0%3D&token=3%40bcmzZZiuExhHvwl8Ucz8c%40U5kSkPSg') driver.maximize_window() # html = driver.page_source # bf1 = BeautifulSoup(html, 'html5lib') time.sleep(1) # 将页面滚动条拖到底部 pdf = driver.find_element_by_id('pdf') driver.execute_script("arguments[0].style.display = 'block';", pdf) time.sleep(10) # print 'pdf===',pdf js = "var q=arguments[0].scrollTop=1000" driver.execute_script(js, pdf) time.sleep(10) driver.save_screenshot('E://4.png')
selenium 屏幕快照
最新推荐文章于 2022-10-11 10:42:34 发布