Concurrent HTTP requests in node.js

When fetching a list of 1 million URL you want to process them in parallel but not all in the same time.

The problem: 1 millions concurrent requests is to much

var http = require("http");

var totalUrl = 10000000;
for( var i = 0; i < totalUrl; i++ ) {
    getUrl(i);
}


function getUrl(idToFetch){

	var options = {
	  host: 'www.mywebsite.com',
	  port: 80,
	  path: '/page.php?id='+idToFetch,
	  agent: false,
	  pageId: idToFetch
	};

	http.get(options, function(res) {
		var pageData = "";

		res.resume();
		res.on('data', function (chunk) {
			if(res.statusCode == 200){
				pageData +=chunk;
			}
		});

		res.on('end', function() {
			console.log("finish to fetch id: "+options.pageId);
			// do something with the HTML page
		});

	}).on('error', function(e) {
	   console.log("Error: " + options.host + "\n" + e.message);
	});
}

The solution: limit to 10 concurrent connection

By working with 10 simultaneous connection our application be usable.
The idea is to create 10 event loop workers who will call the next URL when finish.


var totalUrl = 10000000;
var currentUrl = 0;

var totalLoop = 10;
for( var i = 0; i < totalLoop; i++ ) {
	getNextUrl();
}

function getNextUrl(){

	var idToFetch = currentUrl++;

	if(current >= totalUrl){
		console.log("nothing else to do for this worker");
		return;
	}

	var options = {
	  host: 'www.mywebsite.com',
	  port: 80,
	  path: '/page.php?id='+idToFetch,
	  agent: false,
	  pageId: idToFetch
	};

	http.get(options, function(res) {
		var pageData = "";

		res.resume();
		res.on('data', function (chunk) {
			if(res.statusCode == 200){
				pageData +=chunk;
			}
		});

		res.on('end', function() {
			console.log("finish to fetch id: "+options.pageId);
			// do something with the HTML page

			getNextUrl(); //call the next url to fetch
		});

	}).on('error', function(e) {
	   console.log("Error: " + options.host + "\n" + e.message);
	   getNextUrl();
	});
}

  • http://twitter.com/sergeyzavg Sergey

    Actually you can run even 100 and 1000 simultaneous workers by addig http.globalAgent.maxSockets = 1000; at the top of the script.

  • http://jakescreative.com/ Jake

    Hey, thank you for posting this. This was very helpful as I was running into issues as well trying to push a whole bunch of requests at once. There was just one typo, if(current >= totalUrl) should say if(currentUrl >= totalUrl) otherwise, the second example works great!