Xvfb 和 PyVirtualDisplay
使用这个解决方案,将可以兼容多种浏览器的webdriver,chrome,firefox,等等
yum/apt-get install Xvfb
pip install PyVirtualDisplay
检查DISPLAY设置,默认应该是 :1
env | grep DISPLAY
部分项目代码
from pyvirtualdisplay import Display
from lxml import etree
import re
import requests
from selenium import webdriver
url = 'http://***'
opt = webdriver.ChromeOptions()
display = Display(visible=0, size=(800, 800))
display.start()
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-gpu')
opt.add_argument("--no-sandbox")
driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)
driver.get(url)
time.sleep(3)
response = driver.page_source
driver.quit()
# print(response)
response = "".join(response).split('中间右边的部分')[1]
selector = etree.HTML(response)
for i in range(1,2):
url = 'http://www.cbrc.gov.cn/zhuanti/xzcf/get2and3LevelXZCFDocListDividePage//2.html?current='+ str(i)
opt = webdriver.ChromeOptions()
display = Display(visible=0, size=(800, 800))
display.start()
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-gpu')
opt.add_argument("--no-sandbox")
for i in range(1,2):
url = 'http://www.cbrc.gov.cn/zhuanti/xzcf/get2and3LevelXZCFDocListDividePage//2.html?current='+ str(i)
opt = webdriver.ChromeOptions()
display = Display(visible=0, size=(800, 800))
display.start()
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-gpu')
opt.add_argument("--no-sandbox")
driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)
driver.get(url)
time.sleep(3)
response = driver.page_source
driver.quit()
# print(response)
response = "".join(response).split('中间右边的部分')[1]
selector = etree.HTML(response)
node_list = selector.xpath('//tr')
for node in node_list:
# item = {}
title = ''.join(node.xpath('./td[1]/a/@title'))
print(title)
if len(title) > 4 and '商业银行主要监管指标' not in title :
title = title
url_href = ''.join(node.xpath('./td[1]/a/@href'))
url_href = 'http://www.cbrc.gov.cn'+url_href
opt = webdriver.ChromeOptions()
display = Display(visible=0, size=(800, 800))
display.start()
opt.add_argument('--disable-extensions')
opt.add_argument('--disable-gpu')
opt.add_argument("--no-sandbox")
driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver", chrome_options=opt)
url = url_href
driver.get(url)
time.sleep(3)
node_lists =driver.page_source
#print(node_lists)
selectors = etree.HTML(node_lists)
driver.quit()