Organize and Group Matching Strings in an Array using Node.js

Question

Organize and Group Matching Strings in an Array using Node.js

I'm currently developing an FAQ system that includes a large number of question-answer pairs. My goal is to group similar questions together and I've been utilizing the npm set-clustering package for this purpose.

While the package offers a good match based on token matching, it requires me to specify the number of groups to create.

My ideal scenario would be for the grouping to be automatic, with the algorithm determining the appropriate number of groups to be created (Unsupervised learning).

If you know of any other package or platform that could assist me, please let me know.

Sample Questions:

What is the pricing of your product?

Can I speak to your representative?

Hi

Hi Friend

Hi, Good Morning

How much does it cost?

Current Result: (When specifying '3' as the number of groups)

(Hi, Hi Friend)

(What is the pricing of your product?, How much does the product cost?)

(Can I speak to your representative?, Hi, Good Morning)

Desired Grouping: (Without providing '3' as input)

(Hi, Hi Friend, Hi, Good Morning)

(What is the pricing of your product?, How much does the product cost?)

(Can I speak to your representative?)

Current Code:

var cluster = require('set-clustering');

for (let row of resp) {
    articles.push({
        title: row.que,
        tags: row.tags
    });
}

function similarity(x, y) {
    var score = 0;
    x.tags.forEach(function(tx) {
        y.tags.forEach(function(ty) {
            if (tx == ty)
                score += 1;
        });
    });
    return score;
}

// I want the grouping to be done autonomously without specifying the number of groups
var groups = c.evenGroups(3);

var titles = groups.map(function(group) {
    return group.map(function(article) {
        return article.title;
    });
});

console.log(titles);

Refer https://www.npmjs.com/package/set-clustering

javascript npm cluster-analysis

Answer 1

Answer №1

const ss = require('sentence-similarity')
const thesaurus = require("thesaurus");

const sentenceSim = ss.sentenceSimilarity;
const simScore = ss.similarityScore;
const minFunc = (a,b) => {
    if (a < b) return a;
    else return b;
}

const checkSimilarity = (sentence1, sentence2) => {
    const s1 = sentence1.split(' ');
    const s2 = sentence2.split(' ');
    const numbers = [];
    for (const e of s1) {
    const syn = thesaurus.find(e);
    for (let i=0; i<syn, i<5; i++) {
        e = syn[i];
        numbers.push(sentenceSim(s1, s2, winkOpts)['score'] / minFunc(s1.length, s2.length));
    } 
    }
    return Math.max.apply(null, numbers) >= 0.375;
}

const winkOpts = { f: simScore.winklerMetaphone, options : {threshold: 0} }
const filterWords = (source, maximum = 5) => {
  let _source, matches, x, y;
  _source = source.slice();
  matches = [];
  for (x = _source.length - 1; x >= 0; x--) {
    let output = _source.splice(x, 1);
    for (y = _source.length - 1; y >= 0; y--) {
      if (checkSimilarity(output[0], _source[y])) {
    output.push(_source[y]);
    _source.splice(y, 1);
    x--;
      }
    }
    matches.push(output);
  }
  return matches;
}
const inputWords = ['Your', 'word', 'list'] 
const result = filterWords(inputWords);

Utilizing the Thesaurus library for identifying analogous terms. https://www.npmjs.com/package/thesaurus

Employing Sentence Similarity to enhance matching capabilities. https://www.npmjs.com/package/sentence-similarity

Delivering satisfactory outcomes with the provided dataset.

Answer 2

const ss = require('sentence-similarity')
const thesaurus = require("thesaurus");

const sentenceSim = ss.sentenceSimilarity;
const simScore = ss.similarityScore;
const minFunc = (a,b) => {
    if (a < b) return a;
    else return b;
}

const checkSimilarity = (sentence1, sentence2) => {
    const s1 = sentence1.split(' ');
    const s2 = sentence2.split(' ');
    const numbers = [];
    for (const e of s1) {
    const syn = thesaurus.find(e);
    for (let i=0; i<syn, i<5; i++) {
        e = syn[i];
        numbers.push(sentenceSim(s1, s2, winkOpts)['score'] / minFunc(s1.length, s2.length));
    } 
    }
    return Math.max.apply(null, numbers) >= 0.375;
}

const winkOpts = { f: simScore.winklerMetaphone, options : {threshold: 0} }
const filterWords = (source, maximum = 5) => {
  let _source, matches, x, y;
  _source = source.slice();
  matches = [];
  for (x = _source.length - 1; x >= 0; x--) {
    let output = _source.splice(x, 1);
    for (y = _source.length - 1; y >= 0; y--) {
      if (checkSimilarity(output[0], _source[y])) {
    output.push(_source[y]);
    _source.splice(y, 1);
    x--;
      }
    }
    matches.push(output);
  }
  return matches;
}
const inputWords = ['Your', 'word', 'list'] 
const result = filterWords(inputWords);

Utilizing the Thesaurus library for identifying analogous terms. https://www.npmjs.com/package/thesaurus

Employing Sentence Similarity to enhance matching capabilities. https://www.npmjs.com/package/sentence-similarity

Delivering satisfactory outcomes with the provided dataset.

Organize and Group Matching Strings in an Array using Node.js

Answer №1

Similar questions

Implementing a function trigger upon selection in JavaScript

Submitting an HTML form with no input data

Issue with file upload: The view is not refreshing in response to non-angular events

Utilizing an array in a PHP URL and incorporating it into JavaScript operations

Issue: Dynamic server is experiencing abnormal increase in usage due to headers on Next version 13.4

How to delete the final character from a file stream using node.js and the fs module

selenium tutorial: Testing tooltip functionality with Python

"Enhance Your Website with Drag-and-Drop Cart Functionality using HTML

Trouble with npm installation on Windows following node update

Use the Arrow Keys to guide your way through the Page

Angular 2 encountering an error with the HTTP GET request

Module not defined error

Guide to successfully navigating to a webpage using page.link when the link does not have an id, but is designated by a

Issue: Unable to locate element with the specified selector: #email

Exploring Node.js Express: Understanding the Difference Between Modules and Middleware

Other options besides re-flowing and repainting

Determining the angular difference between the direction of particle motion and a straight line

After a period of time, NodeJS abruptly crashes while processing a CSV file

The PHP script is not being activated by AJAX

Utilizing jQuery/Javascript to replicate the data from a table while excluding the header and then pasting it to the