Headless Chrome
chrome --headless --remote-debugging-port=9222 --disable-gpu
chrome --headless --disable-gpu --remote-debugging-port=9222 --window-size=1280x1696
const CDP = require("chrome-remote-interface");
async function scrape(client) {
const { DOM } = client;
// Get document root element nodeId const rootElement = await DOM.getDocument();
const { root: { nodeId } } = rootElement;
// Use seletor to get the links const { nodeIds: linkIDs } = await DOM.querySelectorAll({
selector: ".content a.title",
nodeId,
});
// Get each element attributes const attributes = await Promise.all(linkIDs.map((ID) =>
DOM.getAttributes({ nodeId: ID })
));
// Atrributes are returned in single array and item pairs // [..., "href", "www.example.com"] const links = attributes
.map(x => x.attributes)
.filter(x => x.includes("href"))
.map((attrs) => {
const index = attrs.indexOf("href");
return attrs[index + 1];
});
// Use set to get unique items only const uniqueLinks = new Set([...links]);
return uniqueLinks;
}
async function onClientHandler(client) {
// Extract domains const { Network, Page, Runtime, Overlay } = client;
Page.loadEventFired(() => {
console.log("Load event fired");
scrape(client)
.then((links) => {
console.log(links);
client.close();
});
});
try {
await Promise.all([ Network.enable(), Page.enable() ]);
await Page.navigate({ url: "https://reddit.com/r/programming" });
} catch (err) {
console.error(err);
client.close();
}
};
CDP(onClientHandler)
.on("error", (err) => {
console.error(err);
});
Puppeteer
Headless PhantomJS
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = Options()
options.add_argument('-headless')
driver = webdriver.Firefox(firefox_options=options)
driver = webdriver.Firefox(firefox_options=options)
try:
driver.get('http://www.baidu.com')
title_tag = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'title')))
print(title_tag.text)
finally:
driver.quit()
Headless Firefox
/path/to/firefox -headless -screenshot https://developer.mozilla.com
var webdriver = require('selenium-webdriver'),
By = webdriver.By,
until = webdriver.until;
var firefox = require('selenium-webdriver/firefox');
# chrome headless
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('disable-gpu')
# options.add_argument('remote-debugging-port=9222')
driver = webdriver.Chrome(chrome_options=options)
driver.get("http://www.python.org")
if "Python" in driver.title:
print("first aseert success")
else:
print("ERROR: first assert failed")
elem = driver.find_element_by_name("q")
elem.send_keys("pycon")
elem.send_keys(Keys.RETURN)
if "No results found." not in driver.page_source:
print("second aseert success")
else:
print("ERROR: second assert failed")
driver.back()
driver.close()