For the past few weeks, I have been running a script to gather task data for my company. The script has been running smoothly since 09/03/2023, but recently it started duplicating entries and causing confusion in identifying which tasks are necessary and which ones are duplicates.
Here is the script that I've been using:
function getEmails() {
// Retrieving emails from the inbox with the label "Emprevo"
var threads = GmailApp.search('label:"Emprevo"', 0, 100);
// Extracting relevant data from emails
for (var i = 0; i < threads.length; i++) {
var messages = threads[i].getMessages();
for (var j = 0; j < messages.length; j++) {
var message = messages[j];
var subject = message.getSubject();
var body = message.getPlainBody();
var emailDate = message.getDate();
// Parsing email content to extract job data
var jobData = parseEmailBody(emailDate, subject, body);
// Writing job data to Google Sheet
var sheet = SpreadsheetApp.getActive().getActiveSheet();
sheet.appendRow(jobData);
// Marking email as read and removing the "Emprevo" label
message.markRead();
threads[i].removeLabel(GmailApp.getUserLabelByName("Emprevo"));
}
}
}
function parseEmailBody(emailDate, subject, body) {
// Extracting relevant data from email body
var senderRegex = /From: (.*?<(.*?)>)/;
var matches = body.match(senderRegex);
var sender = matches ? matches[1] : '';
return [emailDate, subject, sender, body];
}
I initially suspected that thread grouping in Gmail settings might be causing the duplication issue, but even after disabling this setting, the problem persists.
The emails are being categorized and archived correctly, so there shouldn't be a risk of pulling in duplicate emails due to triggers.