Struggling to eliminate duplicate articles from my web scraper results using the following code:
app.get("/scrape", function (req, res) {
request("https://www.nytimes.com/", function (error, response, html) {
// Parsing the HTML using cheerio
var $ = cheerio.load(html);
var uniqueResults = [];
$("div.collection").each(function (i, element) {
var results = [];
// Scrape relevant data
results.link = $(element).find("a").attr("href");
results.title = $(element).find("a").text();
results.summary = $(element).find("p.summary").text().trim();
db.Article.create(results)
.then(function (dbArticle) {
res.json(dbArticle);
}).catch(function (err) {
return res.json(err);
});
});
res.send("Data successfully scraped.");
});
});
// Route for fetching Articles from the database
app.get("/articles", function (req, res) {
db.Article.find()
.then(function (dbArticle) {
res.json(dbArticle);
})
.catch(function (err) {
res.json(err);
});
});
Currently experiencing issues with receiving multiple copies of each article. Attempted solutions like db.Article.distinct and similar methods without success. Any suggestions?