Currently, I am developing a script in JavaScript to read uploaded .csv/.xlsx files and convert the data into an array
containing each row. Using FileReader
along with SheetJs, I have successfully managed to achieve this by implementing the following code:
//Code for the new excel reader
$scope.do_file = function(files)
{
$scope.fileContent = [];
var X = XLSX;
var global_wb;
var f = files[0];
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;console.log(data);
global_wb = X.read(data, {type: 'array'});
var output = "";
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
};
The above code works well for most files, but encounters difficulty when processing a file with Hebrew text encoded in Windows-1255, resulting in corrupted data.
https://i.sstatic.net/5zTu9.png
To explore alternative solutions, I attempted to read the file as text using reader.readAsText
and adjust the encoding accordingly. Here is the revised code snippet:
function is_Hebrew(data)
{
var position = data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
$scope.do_file = function(files)
{
var fullResult = [];
var file = files[0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\r\n');
console.log(lines);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});
console.log(reader);
};
};
However, the modified code fails to accurately interpret the file as it does not distinguish between rows and cells. In instances where a cell contains a string with comma-separated values (e.g. "25,28,29"), the array output becomes inaccurate, treating each value as a separate cell.
Therefore, I have opted to continue using the initial method, but encounter difficulties in changing the encoding. Is there a way to modify the encoding in the original code that utilizes readAsArrayBuffer
to extract the file data?