From 5dfd03fe597d9d7fc089c7669862e998ae1e5a30 Mon Sep 17 00:00:00 2001
From: Jimmy Lai <laijimmy0@gmail.com>
Date: Mon, 12 Dec 2022 19:54:55 +0100
Subject: [PATCH] next-dev: restart dev server exceeds the memory limits
 (#43958)

## Context

There has been [some reports ](https://github.com/vercel/next.js/issues/42514)of OOMs-related crashes with Next 13. Whilst we're fixing the memory leaks that are causing this, some of which are caused by upstream issues, this PR makes Next.js' dev server restarts if it detects that it is gonna crash soon.

You can disable this behaviour by passing `__NEXT_DISABLE_MEMORY_WATCHER=1` to the env process.

## Details

Under the hood, we're using Node's cluster API to create a child worker that will basically watch the memory usage after every request and then kill itself if it goes over 90% of the maximum heap allowance.


## Test plan

I added manually a  leaking function that I called before handling a request. I then manually tested that the server re-started when we were near the limit.

```

function createMemoryLeak() {
  console.log('createMemoryLeak', process.memoryUsage().heapUsed / 1024 / 1024)
  for (let i = 0; i < 10; i++) {
    buffer.push(new Array(1000000).fill('a'))
  }
}

```


## Bug

- [ ] Related issues linked using `fixes #number`
- [ ] Integration tests added
- [ ] Errors have a helpful link attached, see [`contributing.md`](https://github.com/vercel/next.js/blob/canary/contributing.md)

## Feature

- [ ] Implements an existing feature request or RFC. Make sure the feature request has been accepted for implementation before opening a PR.
- [ ] Related issues linked using `fixes #number`
- [ ] [e2e](https://github.com/vercel/next.js/blob/canary/contributing/core/testing.md#writing-tests-for-nextjs) tests added
- [ ] Documentation added
- [ ] Telemetry added. In case of a feature if it's used or not.
- [ ] Errors have a helpful link attached, see [`contributing.md`](https://github.com/vercel/next.js/blob/canary/contributing.md)

## Documentation / Examples

- [ ] Make sure the linting passes by running `pnpm build && pnpm lint`
- [ ] The "examples guidelines" are followed from [our contributing doc](https://github.com/vercel/next.js/blob/canary/contributing/examples/adding-examples.md)
---
 packages/next/cli/next-dev.ts            | 73 ++++++++++++++----------
 packages/next/server/lib/start-server.ts | 15 ++++-
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/packages/next/cli/next-dev.ts b/packages/next/cli/next-dev.ts
index b0a689c84f660fa..713026254ee2cd0 100755
--- a/packages/next/cli/next-dev.ts
+++ b/packages/next/cli/next-dev.ts
@@ -13,6 +13,7 @@ import path from 'path'
 import type { NextConfig } from '../types'
 import type { NextConfigComplete } from '../server/config-shared'
 import { traceGlobals } from '../trace/shared'
+import cluster from 'cluster'
 
 let isTurboSession = false
 let sessionStopHandled = false
@@ -416,39 +417,53 @@ If you cannot make the changes above, but still want to try out\nNext.js v13 wit
     await telemetry.flush()
     return server
   } else {
-    startServer(devServerOptions)
-      .then(async (app) => {
-        const appUrl = `http://${app.hostname}:${app.port}`
-        startedDevelopmentServer(appUrl, `${host || '0.0.0.0'}:${app.port}`)
-        // Start preflight after server is listening and ignore errors:
-        preflight().catch(() => {})
-        // Finalize server bootup:
-        await app.prepare()
+    // we're using a sub worker to avoid memory leaks. When memory usage exceeds 90%, we kill the worker and restart it.
+    // this is a temporary solution until we can fix the memory leaks.
+    // the logic for the worker killing itself is in `packages/next/server/lib/start-server.ts`
+    if (!process.env.__NEXT_DISABLE_MEMORY_WATCHER && cluster.isMaster) {
+      cluster.fork()
+      cluster.on('exit', (worker) => {
+        if (worker.exitedAfterDisconnect) {
+          cluster.fork()
+        } else {
+          process.exit(1)
+        }
       })
-      .catch((err) => {
-        if (err.code === 'EADDRINUSE') {
-          let errorMessage = `Port ${port} is already in use.`
-          const pkgAppPath = require('next/dist/compiled/find-up').sync(
-            'package.json',
-            {
-              cwd: dir,
-            }
-          )
-          const appPackage = require(pkgAppPath)
-          if (appPackage.scripts) {
-            const nextScript = Object.entries(appPackage.scripts).find(
-              (scriptLine) => scriptLine[1] === 'next'
+    } else {
+      startServer(devServerOptions)
+        .then(async (app) => {
+          const appUrl = `http://${app.hostname}:${app.port}`
+          startedDevelopmentServer(appUrl, `${host || '0.0.0.0'}:${app.port}`)
+          // Start preflight after server is listening and ignore errors:
+          preflight().catch(() => {})
+          // Finalize server bootup:
+          await app.prepare()
+        })
+        .catch((err) => {
+          if (err.code === 'EADDRINUSE') {
+            let errorMessage = `Port ${port} is already in use.`
+            const pkgAppPath = require('next/dist/compiled/find-up').sync(
+              'package.json',
+              {
+                cwd: dir,
+              }
             )
-            if (nextScript) {
-              errorMessage += `\nUse \`npm run ${nextScript[0]} -- -p <some other port>\`.`
+            const appPackage = require(pkgAppPath)
+            if (appPackage.scripts) {
+              const nextScript = Object.entries(appPackage.scripts).find(
+                (scriptLine) => scriptLine[1] === 'next'
+              )
+              if (nextScript) {
+                errorMessage += `\nUse \`npm run ${nextScript[0]} -- -p <some other port>\`.`
+              }
             }
+            console.error(errorMessage)
+          } else {
+            console.error(err)
           }
-          console.error(errorMessage)
-        } else {
-          console.error(err)
-        }
-        process.nextTick(() => process.exit(1))
-      })
+          process.nextTick(() => process.exit(1))
+        })
+    }
   }
 
   for (const CONFIG_FILE of CONFIG_FILES) {
diff --git a/packages/next/server/lib/start-server.ts b/packages/next/server/lib/start-server.ts
index 4308a93db34fdb2..a84bd90966aa3b5 100644
--- a/packages/next/server/lib/start-server.ts
+++ b/packages/next/server/lib/start-server.ts
@@ -2,17 +2,28 @@ import type { NextServerOptions, NextServer, RequestHandler } from '../next'
 import { warn } from '../../build/output/log'
 import http from 'http'
 import next from '../next'
-
+import cluster from 'cluster'
+import v8 from 'v8'
 interface StartServerOptions extends NextServerOptions {
   allowRetry?: boolean
   keepAliveTimeout?: number
 }
 
+const MAXIMUM_HEAP_SIZE_ALLOWED =
+  (v8.getHeapStatistics().heap_size_limit / 1024 / 1024) * 0.9
+
 export function startServer(opts: StartServerOptions) {
   let requestHandler: RequestHandler
 
   const server = http.createServer((req, res) => {
-    return requestHandler(req, res)
+    return requestHandler(req, res).finally(() => {
+      if (
+        cluster.worker &&
+        process.memoryUsage().heapUsed / 1024 / 1024 > MAXIMUM_HEAP_SIZE_ALLOWED
+      ) {
+        cluster.worker.kill()
+      }
+    })
   })
 
   if (opts.keepAliveTimeout) {