When the FileReader reads the file as readAsArrayBuffer, it ensures that the correct encoding is used

Currently, I am developing a script in JavaScript to read uploaded .csv/.xlsx files and convert the data into an array containing each row. Using FileReader along with SheetJs, I have successfully managed to achieve this by implementing the following code:

//Code for the new excel reader
$scope.do_file =  function(files)
{
    $scope.fileContent  = [];
    var X = XLSX;
    var global_wb;
    var f = files[0];
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;console.log(data);
        global_wb = X.read(data, {type: 'array'});
        var output = "";
        var result = {};
        global_wb.SheetNames.forEach(function(sheetName) {
            var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
            if(roa.length) result[sheetName] = roa;
        });
        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
            $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }
    };
    reader.readAsArrayBuffer(f);
};

The above code works well for most files, but encounters difficulty when processing a file with Hebrew text encoded in Windows-1255, resulting in corrupted data.

https://i.sstatic.net/5zTu9.png

To explore alternative solutions, I attempted to read the file as text using reader.readAsText and adjust the encoding accordingly. Here is the revised code snippet:

function is_Hebrew(data)
{
    var position = data.search(/[\u0590-\u05FF]/);
    return position >= 0;
}

$scope.do_file =  function(files)
{
    var fullResult = [];
    var file = files[0];
    var reader = new FileReader();
    reader.onload = function(e){
        var data = e.target.result;
        if(!is_Hebrew(data.toString()))
        {
          reader.readAsText(file,'ISO-8859-8');   
        }
    };
    reader.readAsText(file);
    reader.onloadend = function(){
        var lines = reader.result.split('\r\n');
        console.log(lines);
        lines.forEach(element => {
            var cell = element.split(',');
            fullResult.push(cell);
        });

         console.log(reader);
    };
};

However, the modified code fails to accurately interpret the file as it does not distinguish between rows and cells. In instances where a cell contains a string with comma-separated values (e.g. "25,28,29"), the array output becomes inaccurate, treating each value as a separate cell.

Therefore, I have opted to continue using the initial method, but encounter difficulties in changing the encoding. Is there a way to modify the encoding in the original code that utilizes readAsArrayBuffer to extract the file data?

Answer №1

Through extensive exploration of potential solutions, I discovered that the most effective approach to the given question was to merge the two methods mentioned above. The first method is used for reading xlsx files, while the second method is employed for reading csv files. Additionally, a supplemental javaScript library called papaparse is utilized in the second method to address data reading challenges at the cell level.

$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}

// Implementation for the new excel reader
$scope.do_file =  function(files)
{
    var config = {
    delimiter: "",  
    newline: "",    
    quoteChar: '"',
    escapeChar: '"',
    header: false,
    trimHeader: false,
    dynamicTyping: false,
    preview: 0,
    encoding: "",
    worker: false,
    comments: false,
    step: undefined,
    complete: undefined,
    error: undefined,
    download: false,
    skipEmptyLines: false,
    chunk: undefined,
    fastMode: undefined,
    beforeFirstChunk: undefined,
    withCredentials: undefined
    };

    $scope.fileContent  = [];
    var f = files[0];
    var fileExtension = f.name.replace(/^.*\./, '');
    if(fileExtension == 'xlsx')
    {
        var X = XLSX;
        var global_wb;
        var reader = new FileReader();
        reader.onload = function(e)
        {
            var data = e.target.result;
            global_wb = X.read(data, {type: 'array'});
            var result = {};
            global_wb.SheetNames.forEach(function(sheetName) {
               var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
               if(roa.length) result[sheetName] = roa;
            });
            $scope.fileContent =  result["Sheet1"];
            if(!result["Sheet1"])
            {
               $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
            }

        };
        reader.readAsArrayBuffer(f);

    }
    else if(fileExtension == 'csv')
    {
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;
        console.log(f);
        console.log($scope.is_Hebrew(data.toString()));
        if(!$scope.is_Hebrew(data.toString()))
        {
           reader.readAsText(f,'ISO-8859-8');   
        }
    };

    reader.readAsText(f);
    reader.onloadend = function(e){
        var c =  Papa.parse(reader.result,[ config])
        console.log(c);
        $scope.fileContent =  c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });

    };

    }
    else
    {
       alert("File Not supported!");
    }

$scope.fileContent.push([]);
};

Similar questions

If you have not found the answer to your question or you are interested in this topic, then look at other similar questions below or use the search

Error: JSON parsing failed due to an unexpected token "u" at the beginning of the JSON string. This occurred in an anonymous function when

Implementing reCaptcha in my firebase project has been successful. I am now sending form data and the captcha response using grecaptcha.getResponse() to my server upon clicking the send button. Below is the code snippet from client.js: $('.sendUrl ...

extract various information from a csv file

Having trouble reading items from a CSV file and adding them to an input form? When using a specific option value that allows you to add items by pressing comma, it doesn't work when reading from the .csv file. What could be causing this issue? with ...

The problem of "undefined function appendTo" is causing issues

My jQuery code looks like this: $(function() { var productTextTemplate = $('#product-text-template').html(); var productTemplate = $('product-template').html(); var product = productTextTemplate.appendTo(productTemplate); a ...

Using .htaccess file to optimize SEO crawling for single page applications that do not use hashbangs

When using a page with pushState enabled, the typical method of redirecting SEO bots involves utilizing the escaped_fragment convention. More information on this can be found here. This convention operates under the assumption that a hashbang prefix (#!) ...

Issue with Braintree drop-in form: Nonce string generation problem

I've encountered a peculiar issue while trying to utilize the dropin form from Braintree. Successfully integrating the form into a view, here's how it looks: <form id="createTransactionForm" method="post" action="#&qu ...

What is the most efficient method for uploading and parsing extensive CSV files in node.js and express?

I am working on a Node.js/Express application where I need to receive CSV files, parse them, and store the parsed values into a MongoDB database. What is the best approach to create an upload route in Express for these files? Is using streams recommended? ...

Interactive sidebar component with navigation and animated section markers

For weeks, I've been attempting to create a navigation sidebar similar to the ones shown in these images: Even though getbootstrap.com/components offers appealing navigation sidebars, I have not found a built-in component in their library. This has m ...

Utilize Vue.js to easily upload images alongside form input fields

I have recently started a small project with Vue Js. I am trying to incorporate an upload file option in my contact form. Due to the numerous input text fields, I am using serialize for the form. However, I am facing issues with the append function. How ca ...

Disabling a tooltip using the tooltip-is-open attribute is ineffective

I am looking to implement a clickable element with a font awesome icon that can copy data to the clipboard. Additionally, I want to display a tooltip that disappears when the cursor leaves the element. Since I need this functionality in multiple instances ...

View a specific selected newsAPI article on its own dedicated page

I have been working on a news website and successfully displayed all the articles on a single page using the news API in nodeJs. Everything is functioning well, but now I want to show the clicked article on a separate page. Although I managed to route it t ...

What is the best way to make img-fluid function properly within Bootstrap Carousel?

I've been struggling to make my carousel images responsive by using the img-fluid tag, but I haven't had any success. I've attempted using !important and display: block, but nothing seems to work. I'm not sure what's causing the is ...

Troubleshooting: Images not displaying on webpage due to Ajax, JQuery, and JavaScript integration

I'm currently working on building a dynamic photo gallery using Ajax and JQuery in Javascript. I have set up a directory named "images" in Visual Studio Code and it contains my selection of 5 images. However, when I click the "next" and "previous" but ...

When setting the Content-Type of an S3 object to 'image/jpeg' in NodeJS, it may appear as 'application/octet' in the S3 console

I am facing an issue with the Content-Type of an image stored in my JPEG buffer. While it uploads and downloads successfully from S3, I encounter errors when trying to send it via the Messenger API programmatically. The S3 console indicates that the actual ...

Retrieving the response value in AngularJS with $resource

I'm trying to retrieve the response of a request using $resource in AngularJS. Here is an example implementation: angular.module('app').factory('AuthResource', ['$resource', function($resource) { return { isA ...

Having trouble getting HTML to render properly in React JS on localhost using Apache server

For the past week, I've been working on resolving an issue. I started by creating a React Application using: npm create react-app After that, I attempted to build it with: npm run build Everything seemed to go smoothly. I generated a build folder ...

Toggle the visibility of images with input radio buttons

Explanation I am attempting to display an image and hide the others based on a radio input selection. It works without using label, but when I add label, it does not work properly. The issue may be with eq($(this).index()) as it ends up selecting a differ ...

show tab focus outline only

In search of a straightforward and effective method for focusable elements to display an outline only when the tab key is pressed, without showing it when using a mouse in React applications. (looking for something similar to :focus-visible that function ...

Failure of app script to retrieve data from an external spreadsheet

In an attempt to consolidate data, this program aims to transfer information from one spreadsheet to another. The process involves retrieving all files within a designated folder (all of which are spreadsheets), extracting values from a specific range, and ...

Tips for transferring HTML code to a controller

Currently facing an issue while working with MVC and attempting to store HTML code from a view in a database field. In the JS section of my MVC solution, I have the following code snippet: var data = { id_perizia: $("#id_perizia").val(), pinSessione: $("# ...

How to avoid the need to wrap all setState calls with #act in React 18?

One issue I encountered was when upgrading from React 17 to 18, ReactDom render became asynchronous. To handle this, I needed to use #act to wrap the ReactDom render. However, React also required that all setState calls be wrapped with #act. If not done, ...