利用nodejs的cheerio抓取网站数据

最新推荐文章于 2025-02-19 17:45:58 发布

weixin_30256505

最新推荐文章于 2025-02-19 17:45:58 发布

阅读量120

点赞数

CC 4.0 BY-SA版权

原文链接：http://www.cnblogs.com/txxt/p/6079908.html

本文介绍了一种使用Node.js及Cheerio库抓取特定网站数据的方法。通过HTTP模块发起请求并利用Cheerio解析HTML，提取所需信息。示例展示了如何从指定博客页面抓取文章标题。

利用nodejs的cheerio抓取网站数据

/*引入模块*/
var http = require('http')
var url = 'http://www.cnblogs.com/txxt'
var cheerio = require('cheerio')

/*过滤函数*/
function filter(html) {
   var $ = cheerio.load(html)
   var titleData = [];
   var title = $('.postTitle').text();
   console.log(title)
}

/*数据获取*/
http.get(url, function(res){
	var html = '';
	res.on('data',function(data) {
		html += data;
	})
	res.on('end',function(){
		filter(html)
	})
}).on('error',function(){
	console.log('获取数据出错')
})