核心代码
npm index.js
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.kuaidaili.com/ops/');
const res = await page.evaluate(() => {
const text = (v, selector) => {
return v.querySelector(selector) && v.querySelector(selector).innerText;
};
const allHostInfo = Array.from(
document.querySelectorAll("#freelist tbody tr")
);
const data = [];
allHostInfo.map(v => {
const obj = {
host: text(v, "td:nth-child(1)"),
port: text(v, "td:nth-child(2)"),
address: text(v, "td:nth-child(6)"),
};
data.push(obj);
});
return data;
});
console.log(res);
await page.waitFor(2 * 1000);
await browser.close();
})();
npm

本文介绍了如何利用快代理服务进行代理IP的获取,并结合核心代码`npm index.js`和`package.json`文件,展示了通过`npm run v1`命令执行的爬虫运行流程及结果。
最低0.47元/天 解锁文章
1839

被折叠的 条评论
为什么被折叠?



