The Puppeteer software does not automatically shut down the browser once the task is complete

Currently, I have set up puppeteer on my Ubuntu server with Express and Node.js like so:

var puppeteer = require('puppeteer');
var express = require('express');
var router = express.Router();

/* GET home page. */
router.get('/', function(req, res, next) {
    (async () => {
        headless = true;
        const browser = await puppeteer.launch({headless: true, args:['--no-sandbox']});
        const page = await browser.newPage();
        url = req.query.url;
        await page.goto(url);
        let bodyHTML = await page.evaluate(() => document.body.innerHTML);
        res.send(bodyHTML)
        await browser.close();
    })();
});

After running this script multiple times, I noticed that there are numerous Zombie processes:

$ pgrep chrome | wc -l
133

This issue is causing congestion on the server,

How can I resolve this problem?

Would using kill from an Express JS script be a viable solution?

Are there alternative methods to achieve the same outcome without relying on puppeteer and headless chrome?

Answer №1

Oops! It seems like a simple oversight here. What if an error occurs and the await browser.close() is never executed, leaving you with zombie processes.

Relying on shell.js could be seen as a workaround to address this issue.

A better practice would be to use try..catch..finally. This ensures that the browser will always be closed whether there is a successful execution or an error being thrown. With this approach, there is no need to manually handle closing the browser in both the catch block and the finally block. The finally block guarantees closure regardless of any errors.

Therefore, your code should be structured like this:

const puppeteer = require('puppeteer');
const express = require('express');

const router = express.Router();

/* GET home page. */
router.get('/', function(req, res, next) {
  (async () => {
    const browser = await puppeteer.launch({
      headless: true,
      args: ['--no-sandbox'],
    });

    try {
      const page = await browser.newPage();
      url = req.query.url;
      await page.goto(url);
      const bodyHTML = await page.evaluate(() => document.body.innerHTML);
      res.send(bodyHTML);
    } catch (e) {
      console.log(e);
    } finally {
      await browser.close();
    }
  })();
});

I hope this explanation clarifies things for you!

Answer №2

Enclose your code within a try-catch block like this to see if it makes a difference

headless = true;
const browser = await puppeteer.launch({headless: true, args:['--no-sandbox']});
try {
  const page = await browser.newPage();
  url = req.query.url;
  await page.goto(url);
  let bodyHTML = await page.evaluate(() => document.body.innerHTML);
  res.send(bodyHTML);
  await browser.close();
} catch (error) {
  console.log(error);
} finally {
  await browser.close();
}

Answer №3

Based on my observations, the closing process of a browser might not happen immediately after the close command is executed. In such cases, it's advisable to verify the status of the browser process and terminate it forcefully if needed.

if (browser && browser.process() != null) browser.process().kill('SIGINT');

Furthermore, I've included the complete code snippet for managing resources in Puppeteer below. See

bw.on('disconnected', async () => {

const puppeteer = require('puppeteer-extra')
const randomUseragent = require('random-useragent');
const StealthPlugin = require('puppeteer-extra-plugin-stealth')

const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36';
puppeteer.use(StealthPlugin())

function ResourceManager(loadImages) {
    let browser = null;
    const _this = this;
    let retries = 0;
    let isReleased = false;

    this.init = async () => {
        isReleased = false;
        retries = 0;
        browser = await runBrowser();
    };

    this.release = async () => {
        isReleased = true;
        if (browser) await browser.close();
    }

    this.createPage = async (url) => {
        if (!browser) browser = await runBrowser();
        return await createPage(browser,url);
    }

    async function runBrowser () {
        const bw = await puppeteer.launch({
            headless: true,
            devtools: false,
            ignoreHTTPSErrors: true,
            slowMo: 0,
            args: ['--disable-gpu','--no-sandbox','--no-zygote','--disable-setuid-sandbox','--disable-accelerated-2d-canvas','--disable-dev-shm-usage', "--proxy-server='direct://'", "--proxy-bypass-list=*"]
        });

        bw.on('disconnected', async () => {
            if (isReleased) return;
            console.log("BROWSER CRASH");
            if (retries <= 3) {
                retries += 1;
                if (browser && browser.process() != null) browser.process().kill('SIGINT');
                await _this.init();
            } else {
                throw "===================== BROWSER crashed more than 3 times";
            }
        });

        return bw;
    }

    async function createPage (browser,url) {
        const userAgent = randomUseragent.getRandom();
        const UA = userAgent || USER_AGENT;
        const page = await browser.newPage();
        await page.setViewport({
            width: 1920 + Math.floor(Math.random() * 100),
            height: 3000 + Math.floor(Math.random() * 100),
            deviceScaleFactor: 1,
            hasTouch: false,
            isLandscape: false,
            isMobile: false,
        });
        await page.setUserAgent(UA);
        await page.setJavaScriptEnabled(true);
        await page.setDefaultNavigationTimeout(0);
        if (!loadImages) {
            await page.setRequestInterception(true);
            page.on('request', (req) => {
                if(req.resourceType() == 'stylesheet' || req.resourceType() == 'font' || req.resourceType() == 'image'){
                    req.abort();
                } else {
                    req.continue();
                }
            });
        }

        await page.evaluateOnNewDocument(() => {
            //pass webdriver check
            Object.defineProperty(navigator, 'webdriver', {
                get: () => false,
            });
        });

        await page.evaluateOnNewDocument(() => {
            //pass chrome check
            window.chrome = {
                runtime: {},
                // etc.
            };
        });

        await page.evaluateOnNewDocument(() => {
            //pass plugins check
            const originalQuery = window.navigator.permissions.query;
            return window.navigator.permissions.query = (parameters) => (
                parameters.name === 'notifications' ?
                    Promise.resolve({ state: Notification.permission }) :
                    originalQuery(parameters)
            );
        });

        await page.evaluateOnNewDocument(() => {
            // Overwrite the `plugins` property to use a custom getter.
            Object.defineProperty(navigator, 'plugins', {
                // This just needs to have `length > 0` for the current test,
                // but we could mock the plugins too if necessary.
                get: () => [1, 2, 3, 4, 5],
            });
        });

        await page.evaluateOnNewDocument(() => {
            // Overwrite the `plugins` property to use a custom getter.
            Object.defineProperty(navigator, 'languages', {
                get: () => ['en-US', 'en'],
            });
        });

        await page.goto(url, { waitUntil: 'networkidle2',timeout: 0 } );
        return page;
    }
}

module.exports = {ResourceManager}

Answer №4

Today, I faced a similar issue and managed to find a workaround. The problem of Chromium not closing is often caused by unresolved pages. Make sure to close all the pages before executing browser.close(), like so:

const tabs = await browser.pages();
for (let x = 0; x < tabs.length; x++) {
    await tabs[x].close();
}
await browser.close()

I hope this solution proves helpful for someone out there!

Answer №6

For my Puppeteer setup, I follow this basic structure:

const puppeteer = require("puppeteer");

let browser;
(async () => {
  browser = await puppeteer.launch();
  const [page] = await browser.pages();

  /* utilize the page */
  
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close());

The finally block ensures that the browser closes properly even if an error occurs. Errors are logged as needed. Chaining .catch and .finally calls keeps the mainline Puppeteer code neat and achieves the same outcome as below:

const puppeteer = require("puppeteer");

(async () => {
  let browser;

  try {
    browser = await puppeteer.launch();
    const [page] = await browser.pages();

    /* utilize the page */
  }
  catch (err) {
    console.error(err);
  }
  finally {
    await browser?.close();
  }
})();

No need to call newPage since Puppeteer opens with a page already.


Regarding Express, simply include the entire code snippet above in your route, including let browser; and excluding require("puppeteer"). You may want to consider using an async middleware error handler.

You might wonder:

Is there a more efficient method than puppeteer and headless chrome for achieving similar results?

This depends on your specific requirements and definition of "better." If you only need to extract document.body.innerHTML from static HTML, ditching Puppeteer in favor of making a request and utilizing Cheerio could be an alternative.

Additionally, you can optimize resource usage by avoiding opening and closing a new browser per request. Consider following this approach:

const express = require("express");
const puppeteer = require("puppeteer");

const asyncHandler = fn => (req, res, next) =>
  Promise.resolve(fn(req, res, next)).catch(next);

const browserReady = puppeteer.launch({
  args: ["--no-sandbox", "--disable-setuid-sandbox"]
});

const app = express();
app
  .set("port", process.env.PORT || 5000)
  .get("/", asyncHandler(async (req, res) => {
    const browser = await browserReady;
    const page = await browser.newPage();

    try {
      await page.goto(req.query.url || "http://www.example.com");
      return res.send(await page.content());
    }
    catch (err) {
      return res.status(400).send(err.message);
    }
    finally {
      await page.close();
    }
  }))
  .use((err, req, res, next) => res.sendStatus(500))
  .listen(app.get("port"), () =>
    console.log("listening on port", app.get("port"))
  );

Lastly, avoid setting timeouts to 0 (e.g.,

page.setDefaultNavigationTimeout(0);
) to prevent potential script delays. If a timeout is necessary, set it for a reasonable duration, such as a few minutes at most.

Check out these resources too:

  • Parallelism of Puppeteer with Express Router Node JS. How to pass page between routes while maintaining concurrency
  • Puppeteer unable to run on heroku

Answer №7

While using the chromium browser (@sparticuz/chromium), I ran into a problem that was resolved by following the discussion on the issue forum. It seems that there may have been an extra page or tab open in chromium, and closing all pages made a significant difference.

const pages = await browser.pages();
await Promise.all(pages.map((page) => page.close()));
await browser.close();

Answer №8

It's a good practice to close your browser before sending a response.

const automator = require('automator');
const webServer = require('web-server');
const routeHandler = webServer.Router();

routeHandler.get('/', function(req, res, next) {
    (async () => {
        headless = true;
        const browser = await automator.launchBrowser({headless: true});
        const page = await browser.newPage();
        url = req.query.url;
        await page.goto(url);
        let bodyHTML = await page.extractContent(() => document.body.innerHTML);
        await browser.closeBrowser();
        res.send(bodyHTML);
    })();
});

Answer №9

Encountering the same issue led me to explore alternative solutions. While the shelljs solution proved effective, it posed a potential risk of terminating all chrome processes, potentially disrupting an ongoing request processing. Here is an improved approach that addresses this concern.

const puppeteer = require('puppeteer');
const express = require('express');
const router = express.Router();

router.get('/', function (req, res, next) {
    (async () => {
        await puppeteer.launch({ headless: true }).then(async browser => {
            const page = await browser.newPage();
            url = req.query.url;
            await page.goto(url);
            let bodyHTML = await page.evaluate(() => document.body.innerHTML);
            await browser.close();
            res.send(bodyHTML);
        });
    })();
});

Answer №10

utilize

 (await browser).close()

This issue occurs due to the fact that the browser object is a promise that needs to be resolved. I faced this challenge myself and hope this explanation proves helpful.

Answer №11

When I attempted the try-catch-finally approach, it unfortunately did not resolve my issue. Resorting to shelljs' shell.exec('pkill chrome') seemed like a desperate last resort.

Upon further investigation, I discovered that the root of my problem lay in having used redis' await cache.set('key', 'value') function within my code without properly closing it afterwards. It was necessary for me to add await cache.quit() before calling await browser.close(). This simple tweak ultimately fixed the problem I was facing.

I recommend thoroughly examining the libraries or modules you are utilizing in your code, particularly those that require explicit closing or quitting procedures. Look out for any processes that may be running continuously without throwing errors, as using try-catch blocks will not assist in these scenarios and might prevent the browser from closing properly.

Answer №12

To effectively manage zombie processes while running puppeteer inside a docker container using docker-compose, I found success by including init: true in the docker-compose.yml file within the specific service where puppeteer was being executed.

services:
  web:
    image: alpine:latest
    init: true

For more information and troubleshooting tips, refer to the following resources:

  1. https://docs.docker.com/compose/compose-file/compose-file-v2/#init

Similar questions

If you have not found the answer to your question or you are interested in this topic, then look at other similar questions below or use the search

Repeatedly animate elements using jQuery loops

Whenever I click a button, a fish should appear and randomly move over a container at random positions. However, in my current code, the animation only occurs once and does not repeat continuously. I want to create a generic function that can be used for m ...

Using React Native to iterate over a multidimensional array with the array map function

I want to iterate through a two-dimensional array like the one below: var array=[["1"],["3","8"],["4","8","3"],["4","8","3","9"],["1","8","3","9","2"],["6","8","3","9","2","1"],["4","8","3","9","2","11","2"]] Currently, this code only loops through the & ...

Guide on how to verify if a component with a specific name is registered within the Composition API of Vue 3

My current situation involves a template that loads dynamic components based on their names: <template> <div> <div> <div> <component :is="getFormRenderer" &g ...

The Dynamic Kendo Grid construction encountered an invalid template issue

In my project, I'm working on creating a dynamic Kendo UI grid with columns that are dynamically generated. However, I'm encountering an issue where the data is not rendering properly onto the grid. The backend of this project involves using ASP ...

Command field in Javascript

I've crafted an exquisite search box for my website, but I'm struggling to make it functional and display search results. Below are the Html and CSS files pertaining to this section of my site: .searchbox{ /*setting width of the form eleme ...

What is the best way to close an ajax page within the main page using a button?

I am dealing with a situation where I have 2 pages. The first page contains a div called 'ajaxicall', which loads another page inside it. The challenge I am facing is figuring out how to close the second page when I click on the "close" button w ...

Learn how to incorporate additional rows into a table by pressing the plus button within the table with the help of Angular

I require some assistance. I am looking to dynamically generate a row after clicking on the plus button using angular.js. The newly created row should contain an ID and model generated dynamically. Here is an overview of my code: <table class="table ta ...

Why is my React component not updating after changing the state using Set State?

Uncovering the elusive problem seems impossible. Despite state changes, the component remains unrerendered. Here's the code: export const InterestsPupup: React.FC = ({interests, title, buttonText}) => { const [activeItems, setActiveItems] = useSt ...

Is there a way to reset back to the default CSS styles?

I have a div container with a nested span element (simplified). <div class="divDash"> <span>XX</span> </div> The CSS styling for the div makes the span initially hidden and only visible when hovering over the parent div. .div ...

Why does express-locale consistently return the default value?

After successfully installing express-locale using the command: npm install --save express-locale I proceeded to configure it in my project as follows: app = express(); app.use(locale({ priority: ['accept-language', 'cookie', &ap ...

JavaScript problem indicating an error that is not a function

Recently, I've delved into the world of JavaScript and am currently exploring JavaScript patterns. I grasp the concepts well but struggle with calling functions that are already in an object. var productValues = 0; var cart = function(){ this. ...

The function $.post(...) is failing to detect the JSON content in the

I am attempting to send a POST request to the server using the following code: var body = { PatientAgeFilter: { CompareOperator: parseInt(self.patientAge()), MoreThanVal: { AgeSpecifier: 0, AgeValue: parseInt(se ...

The lightbox fails to display in IE when a synchronous ajax request is triggered

I have a piece of code that displays a lightbox with the message 'Please Wait', followed by a synchronous ajax request that removes the lightbox once it's completed. This setup works perfectly in most browsers, but in Internet Explorer, the ...

Obtain the breakpoint value from Bootstrap 5

We have recently updated our project from Bootstrap 4 to Bootstrap 5. I am trying to retrieve the value of a breakpoint in my TypeScript/JavaScript code, which used to work in Bootstrap 4 with: window .getComputedStyle(document.documentElement) .g ...

Error: Callstack Overflow encountered in TypeScript application

Here is the code snippet that triggers a Callstack Size Exceeded Error: declare var createjs:any; import {Animation} from '../animation'; import {Events} from 'ionic-angular'; import { Inject } from '@angular/core'; exp ...

Having trouble receiving a response when making a request to an API

I encountered an issue while trying to fetch an API. Initially, I received an error message stating that the message port was closed before a response was received. After removing all extensions, the error disappeared but now I am still unable to receive a ...

Guide to implementing JWT authentication on a React application

I've been developing a web application using React and incorporating React Router. My current goal is to implement authentication for accessing protected pages within the app. I have a backend server running on Express, but I'm finding it challen ...

When utilizing Sequelize with Express JS, the execution of queries tends to require additional time

The execution time of a query in phpMyAdmin is 0.0039 seconds, whereas the same query takes 6 seconds in express js framework with Sequelize, despite both platforms having a total of 267121 records. SELECT pm.item_type, pm.product_type, pm.p ...

Difficulty with routing stylesheets in my Node Express server

Currently, I am setting up the back-end structure for my website. The file setup looks like this: public css main.css main_b.css index.hbs index_b.hbs server server.js To reference style sheets in the index files, I use link attributes wit ...

Retrieving information from a text document by means of data-src with the assistance of jQuery

Trying to extract text from a .txt file using jQuery while utilizing Bootstrap. New to web design with some coding experience. Button in HTML: <button type="button" class="btn btn-primary-outline-btn text-btn" data-toggle="modal ...