node爬虫cheerio模块

npm i cheerio
使用方式和JQ一样

const http = require("http")
const url = require("url")
const https = require("https")
const request = require("request")
const cheerio = require("cheerio") // 解析html

http.createServer((req, res) => {
  let mURL = url.parse(req.url, true)
  res.writeHead(200, {
    "Content-Type":"text/html;charset=utf-8",
    // 设置cors头,解决跨域
    "access-control-allow-origin":"*"  // *通配符,允许域通过控制
  })
  switch (mURL.pathname) {
    case "/index":
      postReturn((data) => {
        res.end(spider(data))
      })
      break;
  
    default:
      break;
  }
}).listen(3000, () => {
  console.log("启动成功,端口号3000")
})

function postReturn(cb) {
  let data = ""
  
  const headers = {
    "Referer":"https://www.beqege.com",
    "Accept-Encoding":"gzip, deflate, br",
    "Accept-Language":"zh-CN,zh;q=0.9",
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Cookie":"cf_clearance=AdWM_mZ22imbbpqsfDYKxNAz70aV3KTc0COgOmntqc8-1705570091-1-AQni7kyL0tfY+4/zDGV2msmH30MV2QpE7WP4TGI/adYo0jMFLHqCVamokkIr5Vhuef/TU2mI18fsRmJpxCv72dw=; Hm_lvt_aee414c80f8428eae3eaab28dda30d0a=1705570093,1705644117; Hm_lpvt_aee414c80f8428eae3eaab28dda30d0a=1705644957",
      
  }
  request.get('https://www.biqg.cc/book/6909/', headers, (error, response, data) => {
    console.log(response.statusCode)
    if (!error && response.statusCode == 200) {
      // console.log(data) // 请求成功的处理逻辑
      cb(data)
    }

  })
}
function spider(data) { // 解析html
  let $ = cheerio.load(data) // 将网页挂在到cheerio中
  let list = $(".listmain dl dd") // 根据选择器生成数组
  let directory = []
  list.each((index, val) => {
    directory.push($(val).find('a').text()) // 将遍历的值转换为JQ对象,查找a标签,获取里面的内容
  })
  console.log(directory)
  return data
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值