My current challenge involves scraping a page with PhantomJS: . The goal is to extract the links of all products listed on the page. However, new data is loaded dynamically as I scroll down, adding 12 new items each time.
I discovered a hidden form within the HTML code that allowed me to retrieve 61 elements out of a total of 110 when submitted.
The question now is, how can I obtain the links of all products?
Below is the code snippet I have been working on:
var system = require("system");
var fs = require("fs");
var path = 'productLinks.txt';
var url = "http://www.avrilgau.com/fr/5-chaussures";
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.open(url, function (status) {
var content = page.evaluate(function()
{
var allUrl=[];
var tempNodeArray =document.querySelectorAll("#content > ul > li > div > div a.img");
for (var i = 0; i < tempNodeArray.length; i++) {
allUrl.push((tempNodeArray[i]).href);
};
return allUrl.join("\n")+"\n";
});
console.log(content);
fs.write(path, content, 'a');
phantom.exit();
});