Unsure if it is possible to achieve this using regex under JavaScript, but I found the concept interesting and decided to give it a try. I wanted to clean up some HTML code by removing most tags completely, simply dropping them like
<H1><img><a href ....>
. This seemed relatively straightforward (taking inspiration from another post by karim79 Remove HTML Tags in Javascript with Regex).
function(inString, maxlength, callback){
console.log("Sting is " + inString)
console.log("Its " + inString.length)
var regex = /(<([^>]+)>)/ig
var outString = inString.replace(regex, "");
console.log("No HTML sting " + outString);
if ( outString.length < maxlength){
callback(outString)
} else {
console.log("Lets cut first bit")
}
}
Then I began considering whether there was a way to control the execution of regex. For example, if I want to preserve certain tags like b, br, i and maybe change H1-6 to b. In pseudo code, something like:
for ( var i in inString.regex.hits ) {
if ( hits[i] == H1 ) {
hits[i] = b;
}
}
The challenge here is that I want the non-HTML text to remain unchanged, while I only want to remove the HTML tags by default. One approach could be to modify the tags I want to keep. For instance, changing <b>
to [[b]], then reverting them back to <b>
once all unwanted tags are removed. Here's an attempt at implementing this logic (specifically for 'b', and unsure if the following code would function as intended):
function(inString, maxlength, callback){
console.log("Sting is " + inString)
console.log("Its " + inString.length)
var regex-remHTML = /(<([^>]+)>)/ig
var regex-hideB = /(<b>)/ig
var regex-showB = /([b])/ig
var outString = inString.replace(regex-hideB, "[b]");
outString = outString.replace(regex-remHTML, "");
outString = outString.replace(regex-showB, "<b>");
console.log("No HTML sting " + outString);
if ( outString.length < maxlength){
callback(outString)
} else {
console.log("Lets cut first bit")
}
}
Is there a more intelligent way to approach this? Writing code that can identify a piece of HTML tag and then execute specific actions against the match.