Although I understand the concept of convergence, I am puzzled by the fact that the results vary each time the algorithm is refreshed, even when using the same dataset. Can someone point out where my methodology might be incorrect? I've been struggling to pinpoint the error in the process.
function customKMeans(kValue, dataArray, canvasElement, convergeFunction) {
this.canvas = jsHS.GetDimensions(canvasElement);
this.k = kValue;
this.centroids = [];
this.centroidsToCompare = [];
this.data = dataArray;
this.converge = convergeFunction;
this.initialize();
}
customKMeans.prototype.calculateDistance = function () {
var difference = 0,
isArray = jsHS.isArray(arguments);
if (isArray) {
if (arguments.length > 2) {
for (var i = 0; i < arguments.length; i+2) {
var pointA = arguments[i],
pointB = arguments[i + 1];
difference += Math.pow(pointA[0] - pointB[0], 2);
difference += Math.pow(pointA[1] - pointB[1], 2);
}
}
else {
var pointDataA = arguments[0],
pointDataB = arguments[1];
difference += Math.pow(pointDataA[0] - pointDataB[0], 2);
difference += Math.pow(pointDataA[1] - pointDataB[1], 2);
}
}
return Math.sqrt(difference);
};
customKMeans.prototype.calculateMean = function (array) {
var totalSum = 0;
[].forEach.call(array, function(element){
totalSum += element;
});
return totalSum / array.length;
};
customKMeans.prototype.initialize = function () {
for (var l = 0; l < this.k; l++) {
var dataItem = this.data[Math.floor(Math.random() * this.data.length)];
this.centroids.push(dataItem);
}
for (var i = 0; i < this.centroids.length; i++) {
if (i > 0) {
var distance = this.calculateDistance(this.centroids[i], this.centroids[i - 1]);
console.log(distance);
}
}
this.clusterCentroids(); // return centroid center after calculating means.
};
customKMeans.prototype.clusterCentroids = function () {
var dataPoints = [];
this.centroidsToCompare = this.centroids;
for (var d = 0; d < this.data.length; d++) {
var distancesArray = [];
for (var cIndex = 0; cIndex < this.k; cIndex++) {
var dist = this.calculateDistance(this.centroids[cIndex], this.data[d]);
distancesArray.push({ 'centroidID': cIndex, 'distance': dist });
}
var minResult = distancesArray.reduce((closestCentroid, obj) => {
return obj.distance < closestCentroid.distance ? obj : closestCentroid;
});
dataPoints.push({ 'id': d, 'datapoint': this.data[d], 'centroid': minResult.centroidID });
}
var centroidGroups = [];
for (var c = 0; c < this.k; c++) {
var group = [];
for (var p = 0; p < dataPoints.length; p++) {
if (c === dataPoints[p].centroid) {
group.push(dataPoints[p]);
}
}
centroidGroups.push(group);
}
this.centroids = [];
for (var groupIndex = 0; groupIndex < centroidGroups.length; groupIndex++) {
var xAxisValues = [],
yAxisValues = [],
currentGroup = centroidGroups[groupIndex];
[].forEach.call(currentGroup, function (dataPoint) {
xAxisValues.push(dataPoint.datapoint[0]);
yAxisValues.push(dataPoint.datapoint[1]);
});
var meanXValue = this.calculateMean(xAxisValues);
var meanYValue = this.calculateMean(yAxisValues);
this.centroids.push([meanXValue, meanYValue]);
}
if (JSON.stringify(this.centroidsToCompare) !== JSON.stringify(this.centroids)) {
this.centroidsToCompare = [];
dataPoints = [];
this.clusterCentroids();
}
else {
this.converge(centroidGroups, this.centroids);
}
};
window['jsHS']['customKMeans'] = customKMeans;
Example Usage
var exampleKMeans = new jsi.customKMeans(5, Array50, canvas, function (convergenceResults, finalCentroids) {
var count50 = 0;
var mark = {
x: 0,
y: 0,
radius: 0,
color: null,
setCircle: function () {
ctx.beginPath();
ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2, true);
ctx.fillStyle = this.color;
ctx.fill();
}
};
[].forEach.call(finalCentroids, (center) => {
mark.x = center[0];
mark.y = center[1];
mark.color = '#0B6623';
mark.radius = 25;
mark.setCircle();
});
});
This scenario accurately plots centroids on a canvas area but undergoes changes in centroids upon refreshing the browser.