// create links should be hamstersed
function swallow(opts) {
var fs = require("fs");
var dict = JSON.parse(fs.read("./makers/dotaMaker.json"));
var arr = [];
dict.forEach(function(mi, i) {
var url = "http://i.youku.com/u/" + mi.code + "/videos";
arr.push(url);
});
opts.urlArr = arr;
}
// fetch
// sandbox
function hamsters(db) {
return getVideoDetail();
function getVideoDetail() {
var rst = [];
var fetchNode = function(node) {
var title = node.find("li.v_title a").attr("title");
var link = node.find("li.v_link a").attr("href");
var thumb = node.find(".v_thumb img").attr("src");
var ishd = node.find(".v_ishd span").attr("title");
var time = node.find(".v_time .num").text();
var pub = node.find(".v_pub span").text();
var viewCount = node.find(".v_stat .num:eq(0)").text();
var commentCount = node.find(".v_stat .num:eq(1)").text();
var item = {
title: title,
link: link,
thumb: thumb,
ishd: ishd,
time: time,
pub: pub,
viewCount: viewCount,
commentCount: commentCount
};
return item;
};
var nodes = $(".items ul.v");
nodes.each(function(i, n) {
rst.push(fetchNode($(this)));
});
return rst;
}
};
// save data
function bear(opts) {
// 把所有的数组联合成一个数组
var data = Array.prototype.concat.apply([], opts.data);
var db = opts.db;
var transData = require("../lib/transData");
data.forEach(function(ii, i) {
var item=transData(ii);
if (!db({
title: item.title
}).first()) {
db.insert(item);
}
});
}
module.exports = {
// 生成要被fetch的links
swallow: swallow,
// 在links中抓取数据
hamsters: hamsters,
// 按照一定格式写入数据库
bear: bear
};
继续上期,以上是抓取优酷dota视频作者的视频信息的实现