After receiving valuable input from fellow commenters, I made some tweaks to the code. Here's a brief overview: - The goal is to extract product information from over 800 pages of HTML, convert that data into JSON format, and store it in a JSON file. While the code functions smoothly when processing around 20 pages at once, attempting to do all of them triggers the following error:
Error: Max redirects exceeded.
Here is the complete code snippet:
// Necessary module imports
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const url = "http://johndevisser.marktplaza.nl/?p=";
async function getProductsHtml(data) {
const $ = await cheerio.load(data);
let productsHTML = [];
$("div.item").each((i, prod) => {
productsHTML.push(($(prod).html()));
});
return productsHTML;
};
async function parseProducts(html) {
let products = [];
for (item in html) {
// Retain existing data
const $ = await cheerio.load(html[item]);
let product = {};
let mpUrl = $("a").attr("href");
product["title"] = $("a").attr("title");
product["mpUrl"] = mpUrl;
product["imgUrl"] = $("img").attr("src");
let priceText = $("span.subtext").text().split("\xa0")[1].replace(",", ".");
product["price"] = parseFloat(priceText);
products.push(product);
}
return products;
}
async function addDescriptionToProducts(prods) {
for (i in prods) {
const response = await axios.get(prods[i]["mpUrl"])
const $ = cheerio.load(response.data);
description = $("div.description p").text();
prods[i]["descr"] = description;
}
return prods
}
async function getProductsFromPage(i) {
try {
const page = await axios.get(`http://johndevisser.marktplaza.nl/?p=${i}`);
console.log("GET request succeeded!");
// Extract HTML array for each product
const productsHTML = await getProductsHtml(page.data);
console.log("Obtained HTML array!");
// Parse meta info into object array
const productsParsed = await parseProducts(productsHTML);
console.log("Products parsed!")
// Add descriptions to products
const productsMeta = await addDescriptionToProducts(productsParsed);
console.log("Descriptions added!")
// Return complete product information array
return productsMeta;
} catch(e) {
console.log(e);
}
};
async function saveAllProducts() {
try {
const allProducts = await getAllProducts();
let jsonProducts = await JSON.stringify(allProducts);
fs.writeFile("products.json", jsonProducts, "utf8", (e) => {
if (e) {
console.log(e)
}
});
} catch(e) {
console.log(e);
}
}
async function getAllProducts() {
try {
let allProducts = [];
for (let i = 1; i < 855; i++) {
const productsFromPage = await getProductsFromPage(i);
allProducts = [...allProducts, ...productsFromPage];
console.log("Saved products from page " + i);
}
return allProducts
} catch(e) {
console.log(e);
}
}
saveAllProducts();