I have been utilizing a JavaScript script code as mentioned in this specific answer. However, my goal is to avoid saving the resulting HTML page into an HTML file. Instead, I am looking to extract a JSON object from the <div class="rg_meta">
and transfer it to Java code.
During my search, I came across the use of "document", but I encountered an undefined error. I am relatively new to PhantomJS and the manipulation of JSON in Java.
var page = require('webpage').create();
var fs = require('fs');
var system = require('system');
var url = "";
var searchParameter = "";
var count=0;
if (system.args.length === 4) {
url=system.args[1];
searchParameter=system.args[2];
count=system.args[3];
}
if(url==="" || searchParameter===""){
phantom.exit();
}
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';
page.zoomFactor = 0.1;
page.viewportSize = {
width: 1920,
height: 1080
};
var divCount="-1";
var topPosition=0;
var unchangedCounter=0;
page.open(url, function(status) {
console.log("Status: " + status);
if(status === "success") {
window.setInterval(function() {
var newDivCount = page.evaluate(function() {
var divs = document.querySelectorAll(".rg_di.rg_bx.rg_el.ivg-i");
return divs[divs.length-1].getAttribute("data-ri");
});
topPosition = topPosition + 1080;
page.scrollPosition = {
top: topPosition,
left: 0
};
if(newDivCount===divCount){
page.evaluate(function() {
var elems=document.getElementByClassName("rg_meta");
console.log(elems.length);
var button = document.querySelector("#smb");
if(!(typeof button === "undefined")) {
button.click();
console.log('Clicked');
return true;
}else{
return false;
}
});
if(parseInt(unchangedCounter,10) === parseInt(count,10)){
/* var path = searchParameter+'.html';
fs.write('seedHtml/'+path, page.content, 'w');
console.log('printing html');*/
phantom.exit();
}else{
unchangedCounter=unchangedCounter+1;
}
}else{
unchangedCounter=0;
}
divCount = newDivCount;
}, 500);
}else{
phantom.exit();
}
});