用nodejs写了个爬表情包的功能
用到的包:axios cheerio
首先安装node,再安装包:
npm i axios
npm i cheerio
下面是代码:
//表情包爬取
const axios = require('axios')
const cheerio = require('cheerio')
const { fstat } = require('fs')
const path = require('path')
const fs = require('fs')
const { time, assert, error } = require('console')
const { func } = require('assert-plus')
const { dirname } = require('path')
//表情包网站
var baseUrl = "https://fabiaoqing.com/"
//爬取五页表情
main(5)
//主程序
function main(downloadPage){
new Promise(()=>{
for (let i = 0; i < downloadPage; i++) {
var httpUrl = "https://fabiaoqing.com/bqb/lists/type/hot/page/"+i+".html"
parsePageOne(httpUrl)
}
})
// .finally(()=>{
// console.log("所有图片写入完成");
// })
}
//解析主页面,获取各个表情包地址
async function parsePageOne(httpUrl){
let res = await axios.get(httpUrl)
let $ = cheerio.load(res.data)
$('#bqblist a.bqba').each((index, element)=> {
let url = $(element).attr('href');
let imgsUrl = new URL(url, baseUrl).href
parsePageTwo(imgsUrl).catch(error => {
if (error) {
//处理请求失败的url
let failUrl = imgsUrl
let reqNum = 0//计数器
dealFaileUrl(failUrl, reqNum)
}
})
})
}
//递归暴力请求,失败了就继续请求,成功就终止
function dealFaileUrl(failUrl, reqNum){
//失败后0.8秒后再次请求
setTimeout(() => {
parsePageTwo(failUrl).catch(error => {
throw error
}).then(()=>{//处理成功
console.log(failUrl+"请求完成!");
return
}, ()=>{//处理失败
reqNum++
dealFaileUrl(failUrl,reqNum)
console.log(failUrl+"请求了"+reqNum+"次");
})
}, 800);
}
//parsePageTwo('https://fabiaoqing.com/bqb/detail/id/54322.html')
//解析各个表情包的地址
async function parsePageTwo(imgsUrl){
let res = await axios.get(imgsUrl)
let $ = cheerio.load(res.data)
let dirName = $('#bqb h1').html()
//去除文件名里的斜杠
//文件名有特殊字符会报错
if (dirName) {
dirName = dirName.replace(/\//g, '')
dirName = dirName.replace(/\|/g, '')
}else{
throw new Error('无法获取表情包名!')
}
//创建文件夹
fs.mkdir(`./image/${dirName}`, { recursive: true }, (error) => {
if (error) {
throw error
}
console.log("目录创建完成:"+dirName);
//下载文件
$('.swiper-wrapper img').each((index, element)=>{
let imgUrl = $(element).attr('data-original')
//下载
download(imgUrl, dirName).catch(error => {
if (error) {
console.log("下载失败:"+error);
}
})
})
})
}
//download("https://fabiaoqing.com/bqb/detail/id/54329.html")
//下载表情
async function download(imgUrl, dirName){
//文件名
let fileName = path.parse(imgUrl).base
let ws = fs.createWriteStream(`./image/${dirName}/${fileName}`)
axios.get(imgUrl, { responseType:'stream'}).then(res => {
res.data.pipe(ws)
})
ws.on('close', ()=>{
console.log(fileName+"下载完成!");
})
}
Q: 文件下载过多后,控制台不会自动关闭,而是处于堵塞状态?这个问题一直没有解决,有解决了的可以交流交流...