For days, I've been grappling with translating this specific query into MapReduce. My task is to determine the number of different cars that have covered a distance of "N" kilometers.
The Query:
db.adsb.group({
"key": {
"KM": true
},
"initial": {
"countCar": 0
},
"reduce": function(obj, prev) {
if (obj.Matricula != null)
if (obj.Matricula instanceof Array)
prev.countCar += obj.Matricula.length;
else
prev.countCar++;
},
"cond": {
"KM": {
"$gt": 10000,
"$lt": 45000
}
}
});
Each document in Mongo follows this structure:
{
"_id" : ObjectId("5a8843e7d79a740f272ccc0a"),
"KM" : 45782,
"Matricula" : "3687KTS",
}
My objective is to achieve something like this:
/* 0 */
{
“KM” : 45000,
“total” : 634
}
/* 1 */
{
“KM” : 46000,
“total” : 784
}
Although my code compiles, it fails to produce the desired output. It appears that each time 'reduce' is entered, all values reset to zero, hindering the accumulation of registrations. One challenge arises when dealing with large datasets as the function must iterate multiple times within 'reduce'. I'm unsure if this method is suitable, or if I should return a list of car plates and their count within 'reduce', then sum them up during 'finalize'.
// Map function
var m = function() {
if (this.KM > 10000 && this.KM < 45000) {
var fl = Math.round(this.KM / 1000) * 1000;
var car = this.Matricula
emit (fl, car);
}
};
// Reduce function
var r = function(key, values) {
var ya_incluido = false;
var cars_totales = 0;
var lista_car = new Array();
for (var i=0; i < values.length;i++)
{
for (var j=0; j < lista_car.length;j++)
{
if(values[i] == lista_car[j]) {
ya_incluido = true;
}
}
if (ya_incluido != true) {
lista_car.push(values[i]);
}
ya_incluido = false;
}
cars_totales = lista_av.length;
return cars_totales;
};
// Finalize function
var f = function(key,value) {
}
db.runCommand( {
mapReduce: "dealer",
map: m,
reduce: r,
finalize: f,
out: {replace : "result"}
} );
An insightful answer can be found here: