The Puppeteer software does not automatically shut down the browser once the task is complete

Currently, I have set up puppeteer on my Ubuntu server with Express and Node.js like so:

var puppeteer = require('puppeteer');
var express = require('express');
var router = express.Router();

/* GET home page. */
router.get('/', function(req, res, next) {
    (async () => {
        headless = true;
        const browser = await puppeteer.launch({headless: true, args:['--no-sandbox']});
        const page = await browser.newPage();
        url = req.query.url;
        await page.goto(url);
        let bodyHTML = await page.evaluate(() => document.body.innerHTML);
        res.send(bodyHTML)
        await browser.close();
    })();
});

After running this script multiple times, I noticed that there are numerous Zombie processes:

$ pgrep chrome | wc -l
133

This issue is causing congestion on the server,

How can I resolve this problem?

Would using kill from an Express JS script be a viable solution?

Are there alternative methods to achieve the same outcome without relying on puppeteer and headless chrome?

Answer №1

Oops! It seems like a simple oversight here. What if an error occurs and the await browser.close() is never executed, leaving you with zombie processes.

Relying on shell.js could be seen as a workaround to address this issue.

A better practice would be to use try..catch..finally. This ensures that the browser will always be closed whether there is a successful execution or an error being thrown. With this approach, there is no need to manually handle closing the browser in both the catch block and the finally block. The finally block guarantees closure regardless of any errors.

Therefore, your code should be structured like this:

const puppeteer = require('puppeteer');
const express = require('express');

const router = express.Router();

/* GET home page. */
router.get('/', function(req, res, next) {
  (async () => {
    const browser = await puppeteer.launch({
      headless: true,
      args: ['--no-sandbox'],
    });

    try {
      const page = await browser.newPage();
      url = req.query.url;
      await page.goto(url);
      const bodyHTML = await page.evaluate(() => document.body.innerHTML);
      res.send(bodyHTML);
    } catch (e) {
      console.log(e);
    } finally {
      await browser.close();
    }
  })();
});

I hope this explanation clarifies things for you!

Answer №2

Enclose your code within a try-catch block like this to see if it makes a difference

headless = true;
const browser = await puppeteer.launch({headless: true, args:['--no-sandbox']});
try {
  const page = await browser.newPage();
  url = req.query.url;
  await page.goto(url);
  let bodyHTML = await page.evaluate(() => document.body.innerHTML);
  res.send(bodyHTML);
  await browser.close();
} catch (error) {
  console.log(error);
} finally {
  await browser.close();
}

Answer №3

Based on my observations, the closing process of a browser might not happen immediately after the close command is executed. In such cases, it's advisable to verify the status of the browser process and terminate it forcefully if needed.

if (browser && browser.process() != null) browser.process().kill('SIGINT');

Furthermore, I've included the complete code snippet for managing resources in Puppeteer below. See

bw.on('disconnected', async () => {

const puppeteer = require('puppeteer-extra')
const randomUseragent = require('random-useragent');
const StealthPlugin = require('puppeteer-extra-plugin-stealth')

const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36';
puppeteer.use(StealthPlugin())

function ResourceManager(loadImages) {
    let browser = null;
    const _this = this;
    let retries = 0;
    let isReleased = false;

    this.init = async () => {
        isReleased = false;
        retries = 0;
        browser = await runBrowser();
    };

    this.release = async () => {
        isReleased = true;
        if (browser) await browser.close();
    }

    this.createPage = async (url) => {
        if (!browser) browser = await runBrowser();
        return await createPage(browser,url);
    }

    async function runBrowser () {
        const bw = await puppeteer.launch({
            headless: true,
            devtools: false,
            ignoreHTTPSErrors: true,
            slowMo: 0,
            args: ['--disable-gpu','--no-sandbox','--no-zygote','--disable-setuid-sandbox','--disable-accelerated-2d-canvas','--disable-dev-shm-usage', "--proxy-server='direct://'", "--proxy-bypass-list=*"]
        });

        bw.on('disconnected', async () => {
            if (isReleased) return;
            console.log("BROWSER CRASH");
            if (retries <= 3) {
                retries += 1;
                if (browser && browser.process() != null) browser.process().kill('SIGINT');
                await _this.init();
            } else {
                throw "===================== BROWSER crashed more than 3 times";
            }
        });

        return bw;
    }

    async function createPage (browser,url) {
        const userAgent = randomUseragent.getRandom();
        const UA = userAgent || USER_AGENT;
        const page = await browser.newPage();
        await page.setViewport({
            width: 1920 + Math.floor(Math.random() * 100),
            height: 3000 + Math.floor(Math.random() * 100),
            deviceScaleFactor: 1,
            hasTouch: false,
            isLandscape: false,
            isMobile: false,
        });
        await page.setUserAgent(UA);
        await page.setJavaScriptEnabled(true);
        await page.setDefaultNavigationTimeout(0);
        if (!loadImages) {
            await page.setRequestInterception(true);
            page.on('request', (req) => {
                if(req.resourceType() == 'stylesheet' || req.resourceType() == 'font' || req.resourceType() == 'image'){
                    req.abort();
                } else {
                    req.continue();
                }
            });
        }

        await page.evaluateOnNewDocument(() => {
            //pass webdriver check
            Object.defineProperty(navigator, 'webdriver', {
                get: () => false,
            });
        });

        await page.evaluateOnNewDocument(() => {
            //pass chrome check
            window.chrome = {
                runtime: {},
                // etc.
            };
        });

        await page.evaluateOnNewDocument(() => {
            //pass plugins check
            const originalQuery = window.navigator.permissions.query;
            return window.navigator.permissions.query = (parameters) => (
                parameters.name === 'notifications' ?
                    Promise.resolve({ state: Notification.permission }) :
                    originalQuery(parameters)
            );
        });

        await page.evaluateOnNewDocument(() => {
            // Overwrite the `plugins` property to use a custom getter.
            Object.defineProperty(navigator, 'plugins', {
                // This just needs to have `length > 0` for the current test,
                // but we could mock the plugins too if necessary.
                get: () => [1, 2, 3, 4, 5],
            });
        });

        await page.evaluateOnNewDocument(() => {
            // Overwrite the `plugins` property to use a custom getter.
            Object.defineProperty(navigator, 'languages', {
                get: () => ['en-US', 'en'],
            });
        });

        await page.goto(url, { waitUntil: 'networkidle2',timeout: 0 } );
        return page;
    }
}

module.exports = {ResourceManager}

Answer №4

Today, I faced a similar issue and managed to find a workaround. The problem of Chromium not closing is often caused by unresolved pages. Make sure to close all the pages before executing browser.close(), like so:

const tabs = await browser.pages();
for (let x = 0; x < tabs.length; x++) {
    await tabs[x].close();
}
await browser.close()

I hope this solution proves helpful for someone out there!

Answer №6

For my Puppeteer setup, I follow this basic structure:

const puppeteer = require("puppeteer");

let browser;
(async () => {
  browser = await puppeteer.launch();
  const [page] = await browser.pages();

  /* utilize the page */
  
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close());

The finally block ensures that the browser closes properly even if an error occurs. Errors are logged as needed. Chaining .catch and .finally calls keeps the mainline Puppeteer code neat and achieves the same outcome as below:

const puppeteer = require("puppeteer");

(async () => {
  let browser;

  try {
    browser = await puppeteer.launch();
    const [page] = await browser.pages();

    /* utilize the page */
  }
  catch (err) {
    console.error(err);
  }
  finally {
    await browser?.close();
  }
})();

No need to call newPage since Puppeteer opens with a page already.


Regarding Express, simply include the entire code snippet above in your route, including let browser; and excluding require("puppeteer"). You may want to consider using an async middleware error handler.

You might wonder:

Is there a more efficient method than puppeteer and headless chrome for achieving similar results?

This depends on your specific requirements and definition of "better." If you only need to extract document.body.innerHTML from static HTML, ditching Puppeteer in favor of making a request and utilizing Cheerio could be an alternative.

Additionally, you can optimize resource usage by avoiding opening and closing a new browser per request. Consider following this approach:

const express = require("express");
const puppeteer = require("puppeteer");

const asyncHandler = fn => (req, res, next) =>
  Promise.resolve(fn(req, res, next)).catch(next);

const browserReady = puppeteer.launch({
  args: ["--no-sandbox", "--disable-setuid-sandbox"]
});

const app = express();
app
  .set("port", process.env.PORT || 5000)
  .get("/", asyncHandler(async (req, res) => {
    const browser = await browserReady;
    const page = await browser.newPage();

    try {
      await page.goto(req.query.url || "http://www.example.com");
      return res.send(await page.content());
    }
    catch (err) {
      return res.status(400).send(err.message);
    }
    finally {
      await page.close();
    }
  }))
  .use((err, req, res, next) => res.sendStatus(500))
  .listen(app.get("port"), () =>
    console.log("listening on port", app.get("port"))
  );

Lastly, avoid setting timeouts to 0 (e.g.,

page.setDefaultNavigationTimeout(0);
) to prevent potential script delays. If a timeout is necessary, set it for a reasonable duration, such as a few minutes at most.

Check out these resources too:

  • Parallelism of Puppeteer with Express Router Node JS. How to pass page between routes while maintaining concurrency
  • Puppeteer unable to run on heroku

Answer №7

While using the chromium browser (@sparticuz/chromium), I ran into a problem that was resolved by following the discussion on the issue forum. It seems that there may have been an extra page or tab open in chromium, and closing all pages made a significant difference.

const pages = await browser.pages();
await Promise.all(pages.map((page) => page.close()));
await browser.close();

Answer №8

It's a good practice to close your browser before sending a response.

const automator = require('automator');
const webServer = require('web-server');
const routeHandler = webServer.Router();

routeHandler.get('/', function(req, res, next) {
    (async () => {
        headless = true;
        const browser = await automator.launchBrowser({headless: true});
        const page = await browser.newPage();
        url = req.query.url;
        await page.goto(url);
        let bodyHTML = await page.extractContent(() => document.body.innerHTML);
        await browser.closeBrowser();
        res.send(bodyHTML);
    })();
});

Answer №9

Encountering the same issue led me to explore alternative solutions. While the shelljs solution proved effective, it posed a potential risk of terminating all chrome processes, potentially disrupting an ongoing request processing. Here is an improved approach that addresses this concern.

const puppeteer = require('puppeteer');
const express = require('express');
const router = express.Router();

router.get('/', function (req, res, next) {
    (async () => {
        await puppeteer.launch({ headless: true }).then(async browser => {
            const page = await browser.newPage();
            url = req.query.url;
            await page.goto(url);
            let bodyHTML = await page.evaluate(() => document.body.innerHTML);
            await browser.close();
            res.send(bodyHTML);
        });
    })();
});

Answer №10

utilize

 (await browser).close()

This issue occurs due to the fact that the browser object is a promise that needs to be resolved. I faced this challenge myself and hope this explanation proves helpful.

Answer №11

When I attempted the try-catch-finally approach, it unfortunately did not resolve my issue. Resorting to shelljs' shell.exec('pkill chrome') seemed like a desperate last resort.

Upon further investigation, I discovered that the root of my problem lay in having used redis' await cache.set('key', 'value') function within my code without properly closing it afterwards. It was necessary for me to add await cache.quit() before calling await browser.close(). This simple tweak ultimately fixed the problem I was facing.

I recommend thoroughly examining the libraries or modules you are utilizing in your code, particularly those that require explicit closing or quitting procedures. Look out for any processes that may be running continuously without throwing errors, as using try-catch blocks will not assist in these scenarios and might prevent the browser from closing properly.

Answer №12

To effectively manage zombie processes while running puppeteer inside a docker container using docker-compose, I found success by including init: true in the docker-compose.yml file within the specific service where puppeteer was being executed.

services:
  web:
    image: alpine:latest
    init: true

For more information and troubleshooting tips, refer to the following resources:

  1. https://docs.docker.com/compose/compose-file/compose-file-v2/#init

Similar questions

If you have not found the answer to your question or you are interested in this topic, then look at other similar questions below or use the search

What is the best way to integrate ngx-translate's pipe for global translation?

I'm currently utilizing the ngx-translate package in order to internationalize my Angular application. When translating text using the translate pipe like so: {{ 'title' | translate }} An issue arises when attempting to use this pipe in ot ...

Can you explain the significance of "javascript:void(0)"?

<a href="javascript:void(0)" id="loginlink">login</a> The usage of the href attribute with a value of "javascript:void(0)" is quite common, however, its exact meaning still eludes me. ...

How can Array.map() be combined with D3 selection?

Is there a way to calculate the maximum length of text elements in an SVG selection similar to using Array.map()? Currently, I am retrieving the maximum length of a selection of SVG <text/> elements by utilizing .selectAll(...)[0].map(...), but it fe ...

Is it possible to expand the Angular Material Data Table Header Row to align with the width of the row content?

Issue with Angular Material Data Table Layout Link to relevant feature request on GitHub On this StackBlitz demo, the issue of rows bleeding through the header when scrolling to the right and the row lines not expanding past viewport width is evident. Ho ...

Is there a way to automatically change the display of an element once the user has closed the menu?

How can I ensure that the display of an element remains unchanged when a user opens and closes my website menu using JavaScript? ...

Prepared SQL Statement in NodeJS for MSSQL using WHERE IN clause

I'm using the sql npm package in my Node.js project. Currently, I have an array of product SKUs like this: var skus = ['product1', 'product2', 'product3']; The SQL query stored in a file looks like this: SELECT * FROM ...

Animate the leftward disappearance of a div to make it vanish

My goal is to create a user interface where users can navigate through different divs. Here is the HTML code: <article id="realize" class="realizeBox"> <div class="shown"> <div class="heading"> <h2>Realisati ...

Oops! The module "rxjs/Subject" seems to be missing the exported member "Subject"

Here is the code I'm working with: import { Subject } from 'rxjs/Subject'; Upon importing this, an error occurs: rxjs/Subject" has no exported member 'Subject'. I am unable to fix this issue. Does anyone have a solution? ...

Utilizing null values within the map function in React JS

I am currently developing an application using React JS. The app displays a list of users along with the status of books (available, taken, or requested) for each user. However, I'm encountering an issue where even after filtering out the books based ...

What is causing the jQuery functions to not be executed in sequence?

Whenever I click the Start button, my function is supposed to run a cycle for 10 iterations. Each iteration generates a number between 1 and 7, which then triggers a series of functions on different objects. The problem is that these functions are not runn ...

Troubleshooting Issue with Query Functionality in MEAN App's Find Request

I'm facing some challenges while working with queries in my MEAN App. Specifically, I am attempting to retrieve data that matches the input entered into a search field: $scope.searchInput = function(search){ $http({ method: 'GET', url: ...

Using Slick.JS to Sync Navigation with Main Slider, Automatically Resetting to First Slide

I have a question about the functionality of the Slick.JS plugin that I'm using. In my project, I have two carousels with five slides each. The top carousel displays one slide at a time, while the bottom carousel shows all five slides concurrently. My ...

Trigger the script upon clicking the Save button within the AdminBro Edit page

AdminBro's documentation mentions that they provide predefined actions Edit (record action) - update records in a resource They also offer a hook called after. I created a function and assigned it to MyResource.edit.after. However, the issue is tha ...

Adding images in ascending order according to the parent div's ID

If I have three different divs with unique IDs on a webpage <div id="product-id-001"></div> <div id="product-id-002"></div> <div id="product-id-003"></div> Is there a way to add image elements based on the ID of each d ...

What are some ways to make autorun compatible with runInAction in mobx?

Currently delving into the world of mobx and runInAction, facing a challenge in comprehending why autorun fails to trigger my callback in this particular scenario: class ExampleClass { // constructor() { // this.exampleMethod(); // } ...

Leveraging the power of JavaScript and possibly regular expressions to extract numerical values from a

I am attempting to extract a variable number from the text provided in the following example: Buy 5 for € 16.00 each and save 11% Buy 50 for € 15.00 each and save 17% Buy 120 for € 13.00 each and save 28% Buy 1000 for € 10.00 each and save 45% Th ...

Can you help me figure out why my Fetch request from the Nodejs backend isn't working?

I keep encountering an error every time I try to make a fetch request: server running on 8000 port { type: 'https://httpstatus.es/401', status: 401, title: 'Unauthorized', detail: 'Access token invalid or expired' } Ev ...

Troubleshooting: Why are my images not displaying in webpack and node.js setup?

My problem: I'm facing an issue with background images in my project. I have included two images and used file-loader to bundle them through webpack. While the images display correctly in the app during development using webpack-dev-server, they disap ...

Is there an issue with this npm version number?

I am trying to include the following dependency in the package.json file of my npm package: "redux-saga": "^1.0.0-beta.0 || ^0.16.0"`. When I install this package in a project that already has "redux-saga": "^1.0.0-beta.1 I am expecting npm/yarn to on ...

Tips on effectively rendering child components conditionally in React

My components currently consist of an AddBookPanel containing the AddBookForm. I am looking to implement a feature where the form is displayed upon clicking the 'AddBookButton', and hidden when the 'x' button (image within AddBookForm c ...