npm i cheerio
使用方式和JQ一样
const http = require("http")
const url = require("url")
const https = require("https")
const request = require("request")
const cheerio = require("cheerio") // 解析html
http.createServer((req, res) => {
let mURL = url.parse(req.url, true)
res.writeHead(200, {
"Content-Type":"text/html;charset=utf-8",
// 设置cors头,解决跨域
"access-control-allow-origin":"*" // *通配符,允许域通过控制
})
switch (mURL.pathname) {
case "/index":
postReturn((data) => {
res.end(spider(data))
})
break;
default:
break;
}
}).listen(3000, () => {
console.log("启动成功,端口号3000")
})
function postReturn(cb) {
let data = ""
const headers = {
"Referer":"https://www.beqege.com",
"Accept-Encoding":"gzip, deflate, br",
"Accept-Language":"zh-CN,zh;q=0.9",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Cookie":"cf_clearance=AdWM_mZ22imbbpqsfDYKxNAz70aV3KTc0COgOmntqc8-1705570091-1-AQni7kyL0tfY+4/zDGV2msmH30MV2QpE7WP4TGI/adYo0jMFLHqCVamokkIr5Vhuef/TU2mI18fsRmJpxCv72dw=; Hm_lvt_aee414c80f8428eae3eaab28dda30d0a=1705570093,1705644117; Hm_lpvt_aee414c80f8428eae3eaab28dda30d0a=1705644957",
}
request.get('https://www.biqg.cc/book/6909/', headers, (error, response, data) => {
console.log(response.statusCode)
if (!error && response.statusCode == 200) {
// console.log(data) // 请求成功的处理逻辑
cb(data)
}
})
}
function spider(data) { // 解析html
let $ = cheerio.load(data) // 将网页挂在到cheerio中
let list = $(".listmain dl dd") // 根据选择器生成数组
let directory = []
list.each((index, val) => {
directory.push($(val).find('a').text()) // 将遍历的值转换为JQ对象,查找a标签,获取里面的内容
})
console.log(directory)
return data
}