I am currently utilizing CasperJS to extract inner texts from webpages and store them in a JSON file.
Below you can find the code I am using along with an issue that I am encountering!
var words = [];
var casper = require('casper').create();
var x = require('casper').selectXPath;
var fs = require('fs');
function getWords() {
var words = document.querySelectorAll('span.inner_tit');
return Array.prototype.map.call(words, function(e) {
return e.innerHTML;
});
}
function createFinal(wordArray) {
var out = [];
wordArray.forEach(function(word) {
out.push({"type": "river", "name": word, "spell": word.length});
});
return out;
}
casper.start('http://dic.daum.net/index.do?dic=kor');
casper.thenClick(x('//*[@id="searchSubmit"]'), function(){
console.log('searching');
});
casper.wait(2000, function() {
casper.then(function() {
words = this.evaluate(getWords);
});
});
casper.wait(3000, function() {
casper.thenClick(x('//*[@id="mArticle"]/div[2]/a[2]'), function (){
words = words.concat(this.evaluate(getWords));
});
});
casper.run(function() {
var my_object = { "my_initial_words": createFinal(words)};
this.echo(JSON.stringify(my_object, null, '\t'))
var result = JSON.stringify(my_object, null, '\t')
fs.write('myresults.json', result, 'a');
this.exit();
});
The issue with this code is when the JSON code looks like this:
{
"my_initial_words": [
{
"type": "river",
"name": "apple",
"spell": "5"
},
{
"type": "river",
"name": "banana",
"spell": "6"
}
]
}
The program appends everything including the name of the JSON arrays like this:
{
"my_initial_words": [
{
"type": "river",
"name": "apple",
"spell": "5"
},
{
"type": "river",
"name": "banana",
"spell": "6"
}
]
} {
"my_initial_words": [
{
"type": "river",
"name": "apple",
"spell": "5"
},
{
"type": "river",
"name": "banana",
"spell": "6"
}
]
}
To solve this issue, only these elements should be added (without "my _initial_words: []"):
{"type": "river", "name": "apple","spell": "5"},
{"type": "river", "name": "banana","spell": "6"}