Utilize regular expressions in TamperMonkey to extract specific groups of text

Question

Utilize regular expressions in TamperMonkey to extract specific groups of text

I'm currently working on a TamperMonkey userscript that aims to identify URLs matching a specific pattern, visit these pages, extract relevant information, and then update the link for each URL with the extracted text. I'm facing some challenges with the regex matching/extraction process.

Although I've confirmed that my regex is functioning correctly, it seems to fail when I run the script, and I'm struggling to extract the desired group. Any assistance would be greatly appreciated.

// ==UserScript==
// @name         Pull through titles
// @namespace    http://tampermonkey.net/
// @version      0.2
// @description  ---
// @author       You
// @match        https://xxx/curriculum-overview/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=tampermonkey.net
// @grant        none
// ==/UserScript==

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide\" .*value=\"(.+)\"/;
    var evidence = "";
    var title = "";

    //Show descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Find all URLs that link to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //For each URL, visit the page and extract the title of the evidence and update the innerText with that title
    for (i = 0; i < urls.length; i++){

        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.responseType = "text";
        xhr.onload = () => {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                }
            }
        };

        xhr.send(null);
        title = evidence.match(regexPattern); //extract matching regex pattern
        alert(title.toString()); //once I know the string is extracted will append to
        //urls[i].innerText = urls[i].toString(); //this line tests that the innerText can be changed to the URL; will change to title variable once working
    }
})
();

javascript tampermonkey

Answer 1

Answer №1

Perform your regex operations within the xhr.onload function after fetching the data. Since the xhr.open call is asynchronous, trying to match the regex pattern using evidence.match(regexPattern); immediately will not work as evidence will be empty.

For example:

xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      evidence = xhr.responseText;
      title = evidence.match(regexPattern); //extract matching regex pattern
      alert(title.toString());
    }
  }
};

Alternatively, you can create a separate function and call it from within the onload method:

function getTitle(text) {
   var mytitle = text.match(regexPattern); 
   console.log(mytitle); // or any other action...
}

// then...

xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      getTitle(xhr.responseText);
    }
  }
};

Answer 2

Perform your regex operations within the xhr.onload function after fetching the data. Since the xhr.open call is asynchronous, trying to match the regex pattern using evidence.match(regexPattern); immediately will not work as evidence will be empty.

For example:

xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      evidence = xhr.responseText;
      title = evidence.match(regexPattern); //extract matching regex pattern
      alert(title.toString());
    }
  }
};

Alternatively, you can create a separate function and call it from within the onload method:

function getTitle(text) {
   var mytitle = text.match(regexPattern); 
   console.log(mytitle); // or any other action...
}

// then...

xhr.onload = () => {
  if (xhr.readyState === xhr.DONE) {
    if (xhr.status === 200) {
      getTitle(xhr.responseText);
    }
  }
};

Answer 3

Answer №2

During a collaborative debugging session, my friend and I discovered that the urls[i] variable was not accessible within the onload function. This seems to be what @Fraser was hinting at, explaining why we were having trouble updating the innerText. To address this issue, we devised a slightly different approach which I am sharing here in case it can benefit others.

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide\" .*value=\"(.+)\"/;
    var evidence = "";
    var title = "";

    //Display descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Identify all URLs linking to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //Visit each URL, extract the evidence title, and update the innerText with that title
    for (i = 0; i < urls.length; i++){
        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.tmlink = urls[i]; // Workaround: adding url as a property to xhr
        xhr.responseType = "text";
        xhr.onload = function() {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                    title = evidence.match(regexPattern); // Extract matching regex pattern
                    this.tmlink.innerText = title[1].toString(); // Update innerText for the hyperlink
                }
            }
        };

        xhr.send(null);
    }
})
();

Answer 4

During a collaborative debugging session, my friend and I discovered that the urls[i] variable was not accessible within the onload function. This seems to be what @Fraser was hinting at, explaining why we were having trouble updating the innerText. To address this issue, we devised a slightly different approach which I am sharing here in case it can benefit others.

(function() {
    'use strict';

    const regexPattern = /portfolio-item__text-input-wide\" .*value=\"(.+)\"/;
    var evidence = "";
    var title = "";

    //Display descriptors by default
    var x = document.getElementsByClassName("js-descriptors");
    for (var i = 0, max = x.length; i < max; i++) {
        x[i].style.display = "block";
    }

    //Identify all URLs linking to evidence
    var urls = document.querySelectorAll('a[href*="portfolio-item/review"]');

    //Visit each URL, extract the evidence title, and update the innerText with that title
    for (i = 0; i < urls.length; i++){
        const xhr = new XMLHttpRequest();
        xhr.open("GET", urls[i], true);
        xhr.tmlink = urls[i]; // Workaround: adding url as a property to xhr
        xhr.responseType = "text";
        xhr.onload = function() {
            if (xhr.readyState === xhr.DONE) {
                if (xhr.status === 200) {
                    evidence = xhr.responseText;
                    title = evidence.match(regexPattern); // Extract matching regex pattern
                    this.tmlink.innerText = title[1].toString(); // Update innerText for the hyperlink
                }
            }
        };

        xhr.send(null);
    }
})
();

Utilize regular expressions in TamperMonkey to extract specific groups of text

Answer №1

Answer №2

Similar questions

What is the best way to implement this component into my vue.js project?

Generate JSON with a distinct structure

How to use Express Validator to validate both email and username within a single field?

Is there a way to execute a function after EACH user interaction?

The Node.js controller is in disarray

Revamp the current webpage to display attribute values in textual format

What is causing the ERR_HTTP_HEADERS_SENT('set') error when running on Windows Server, but not on my development machine?

What is the best way to toggle the visibility of multiple column groups in Ag-Grid community using dynamic

Encountering a GraphQL error during the compilation process

Adjust the height of a DIV element using Jquery Resizable to a minimum height of 1px, smaller than its default value

Is there a way to extract the values from a range slider individually and then display them as the minimum and maximum values on the screen?

Discovering the method to extract a Specific Form Field value with JQuery

The server remains unreachable despite multiple attempts to send data using Angular's $http

Ways to unveil a concealed div element using JavaScript

Button click not triggering JQuery click event

Do factory and service represent examples of Declarative Programming within AngularJS?

Is it possible to establish the page state upon loading (in the context of a GET request

Resolve the conflict with the upstream dependency when installing NPM packages

Executing API calls utilizing Axios in a NodeJS environment with the Authorization Basic feature

Acquiring a fresh scope in Angular via a different component