If JavaScript had full support, a DOM-based solution could be implemented.
var html = "<font color=\"#FF0202\">NOT THIS ONE</font><font color=\"#FF0101\">\n Data which is want to fetch\n</font>";
var faketag = document.createElement('faketag');
faketag.innerHTML = html;
var arr = [];
[].forEach.call(faketag.getElementsByTagName("font"), function(v,i,a) {
if (v.hasAttributes() == true) {
for (var o = 0; o < v.attributes.length; o++) {
var attrib = v.attributes[o];
if (attrib.name === "color" && attrib.value === "#FF0101") {
arr.push(v.innerText.replace(/^\s+|\s+$/g, ""));
}
}
}
});
document.body.innerHTML = JSON.stringify(arr);
However, as per the GAS reference:
Apps Script code runs on Google's servers and does not support browser-based features like DOM manipulation or the Window API.
To extract inner text of <font color="#FF0101">
tags, regex can be used:
function myFunction() {
var doc = DocumentApp.getActiveDocument();
var paras = doc.getParagraphs();
var MyRegex = new RegExp('<font\\b[^<]*\\s+color="#FF0101"[^<]*>([\\s\\S]*?)</font>','ig');
for (i=0; i<paras.length; ++i) {
while (match = MyRegex.exec(paras[i].getText()))
{
Logger.log(match[1]);
}
}
}
The regex matches any font
tag with color
attribute set to #FF0101
. Regex may not be perfect for HTML parsing, consider using more reliable techniques.
<font\\b[^<]*\\s+color="#FF0101"[^<]*>([^<]*(?:<(?!/font>)[^<]*)*)</font>
To handle HTML data spread across multiple paragraphs:
function myFunction() {
var doc = DocumentApp.getActiveDocument();
var text = doc.getBody().getText();
var MyRegex = new RegExp('<font\\b[^<]*\\s+color="#FF0101"[^<]*>([\\s\\S]*?)</font>','ig');
while (match = MyRegex.exec(text))
{
Logger.log(match[1]);
}
}
Given this input:
<font color="#FF0202">NOT THIS ONE</font>
<font color="#FF0101">
Data which is want to fetch
</font>
The result would be:
https://i.sstatic.net/ebDcZ.png