利用 nodejs 爬取各个博客网站的最新或者最热的博客文章,目前爬取的网站有开发者头条,SegmentFault,极客头条,HTML5 梦工厂, Tutorialzine等网站。在线预览地址
nodejs SegmentFaultHTML5 Tutorialzine,
SuperAgent : superagent
, ajax api
,,, nodejs
api
, nodejs
cheerio JQuery
HTML
Heroku: nodejs
superagent.get(toutiaoUrl)
.end(function(error, sres) {
var $ = cheerio.load(sres.text);
var articles = [];
$('.post .content a').each(function(index, element) {
var $element = $(element);
var title = $element.text();
var href = url.resolve(toutiaoUrl, $element.attr('href'));
articles.push({
title: title,
href: href
});
});
res.send(articles);
});
superagent
html
cheerio
JQuery
HTML
SegmentFault
sessionStorage
if (window.sessionStorage) {
if (!sessionStorage.getItem(urls[count])) {
getPostByAJAX(urls[count]);
} else {
postsUL.innerHTML = JSON.parse(sessionStorage.getItem(urls[count]));
}
} else {
getPostByAJAX(urls[count]);
}