nodejs+jquery爬虫的简单封装

默认分类 | 2015-03-08 04:19:46 | 阅读 8284 次 | 评论(0)

jq.js

module.exports.jq = function(){
	var request = require('request');
	var jsdom = require("jsdom");
	var jquery = require('jquery');
	var $ = jquery(jsdom.jsdom().parentWindow);
	var Iconv = require('iconv').Iconv;

	$.extend({

		get: function() {
			var url, charset, callback;
			if (arguments.length == 2) {
				url = arguments[0];
				charset = null;
				callback = arguments[1];
			} else if (arguments.length == 3) {
				url = arguments[0];
				charset = arguments[1];
				callback = arguments[2];
			}
			request({uri: url, encoding: 'binary'}, function(error, response, html) {
				html = new Buffer(html, 'binary');
				if (charset) {
					charset = {gbk:'gbk'}[charset] || 'gbk';
					var conv = new Iconv(charset, 'utf8');
					html = conv.convert(html);
				}
				html = html.toString();

				jsdom.env({
					html: html,
					done: function (errors, window) {
						var result = jquery(window)("html");
						callback(result);
					}
				});
			});
		}

	});

	return $;
};


test.js

var $ = require("./jq").jq();

$.get("http://bbs.bccn.net", "gbk", function (html) {
	var title = html.find("title").text();
	console.log(title);
});


node test.js


文章评论,共0条
游客请输入验证码
浏览1907737次
文章归档
最新评论
  • Miss_Zou:目测是大佬
  • 神犇dengyuhy:语文很好
  • 猫云:学习 学习