In the script provided below, there are URLs stored in the "links" array. The function gatherLinks()
is designed to collect additional URLs from sitemap.xml based on the URLs present in the "links" array. Once the "links" array reaches a certain number of URLs (determined by the variable "limit"), the function request()
is executed for each URL within the "links" array to send a request to the server and retrieve the response time. The total time taken by the program is reported upon completion.
I developed a PhantomJS program (source included) to send requests and measure the time taken, aiming to compare the performance between versions 2.0.0 and 1.9.8. I extract links using the sitemap.xml file from sites manually added to the "links" array.
When running with PhantomJS 2.0.0, after around 65 requests, the program begins outputting the following message while executing the method page.open()
:
var system = require('system');
var fs = require('fs');
var links = [];
links = [
"http://somesite.com",
"http://someothersite.com",
.
.
.
];
var index = 0, fail = 0, limit = 300;
finalTime = Date.now();
var gatherLinks = function(link){
var page = require('webpage').create();
link = link + "/sitemap.xml";
console.log("Fetching links from " + link);
page.open(link, function(status){
if(status != "success"){
console.log("Sitemap Request FAILED, status: " + status);
fail++;
return;
}
var content = page.content;
parser = new DOMParser();
xmlDoc = parser.parseFromString(content, 'text/xml');
var loc = xmlDoc.getElementsByTagName('loc');
for(var i = 0; i < loc.length; i++){
if(links.length < limit){
links[links.length] = loc[i].textContent;
} else{
console.log(links.length + " Links prepared. Starting requests.\n");
index = 0;
request();
return;
}
}
if(index >= links.length){
index = 0;
console.log(links.length + " Links prepared\n\n");
request();
}
gatherLinks(links[index++]);
});
};
var request = function(){
t = Date.now();
var page = require('webpage').create();
page.open(links[index], function(status) {
console.log('Loading link #' + (index + 1) + ': ' + links[index]);
console.log("Time taken: " + (Date.now() - t) + " msecs");
if(status != "success"){
console.log("Request FAILED, status: " + status);
fail++;
}
if(index >= links.length-1){
console.log("\n\nAll links done, final time taken: " + (Date.now() - finalTime) + " msecs");
console.log("Requests sent: " + links.length + ", Failures: " + fail);
console.log("Success ratio: " + ((links.length - fail)/links.length)*100 + "%");
phantom.exit();
}
index++;
request();
});
}
gatherLinks(links[0]);
Having experimented extensively with the code, no clear pattern emerges regarding the issues described below. With version 2.0.0, successfully sending 300 requests without errors only occurred once. Various combinations of URLs were tested, but failures typically happened between the 50th and 80th request. I keep record of failed URLs which all function normally when tested individually using another PhantomJS application. Version 1.9.8 displays greater stability, though intermittent crashes still occur without discernible patterns.