nodejs+jquery爬虫的简单封装

作者在 2015-03-08 04:19:46 发布以下内容

jq.js

module.exports.jq = function(){
	var request = require('request');
	var jsdom = require("jsdom");
	var jquery = require('jquery');
	var $ = jquery(jsdom.jsdom().parentWindow);
	var Iconv = require('iconv').Iconv;

	$.extend({

		get: function() {
			var url, charset, callback;
			if (arguments.length == 2) {
				url = arguments[0];
				charset = null;
				callback = arguments[1];
			} else if (arguments.length == 3) {
				url = arguments[0];
				charset = arguments[1];
				callback = arguments[2];
			}
			request({uri: url, encoding: 'binary'}, function(error, response, html) {
				html = new Buffer(html, 'binary');
				if (charset) {
					charset = {gbk:'gbk'}[charset] || 'gbk';
					var conv = new Iconv(charset, 'utf8');
					html = conv.convert(html);
				}
				html = html.toString();

				jsdom.env({
					html: html,
					done: function (errors, window) {
						var result = jquery(window)("html");
						callback(result);
					}
				});
			});
		}

	});

	return $;
};


test.js

var $ = require("./jq").jq();

$.get("http://bbs.bccn.net", "gbk", function (html) {
	var title = html.find("title").text();
	console.log(title);
});


node test.js


默认分类 | 阅读 9583 次
文章评论,共0条
游客请输入验证码
浏览2893749次
文章归档
最新评论
  • 静夜思:-1是多核
  • 时光拾荒者:CtrlCV还是强的😝
  • 硬识岩丝:解决了,太感谢了~