Skip to content

Commit

Permalink
fix: limit packument cache size based on heap size
Browse files Browse the repository at this point in the history
This adds a new packument cache that is an instance of `lru-cache`.
It uses that package's ability to limit content based on size, and has
some multipliers based on research to mostly correctly approximate the
correlation between packument size and its memory usage.  It also limits
the total size of the cache based on the actual heap available.

Closes: #7276
Related: npm/pacote#369
  • Loading branch information
wraithgar committed May 7, 2024
1 parent 5b2317b commit 722c0fa
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 14 deletions.
1 change: 1 addition & 0 deletions DEPENDENCIES.md
Expand Up @@ -582,6 +582,7 @@ graph LR;
npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"];
npmcli-arborist-->json-parse-even-better-errors;
npmcli-arborist-->json-stringify-nice;
npmcli-arborist-->lru-cache;
npmcli-arborist-->minify-registry-metadata;
npmcli-arborist-->minimatch;
npmcli-arborist-->nock;
Expand Down
22 changes: 16 additions & 6 deletions smoke-tests/test/fixtures/setup.js
Expand Up @@ -74,7 +74,7 @@ const getCleanPaths = async () => {
}

module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy = false } = {}) => {
const debugLog = debug || CI ? (...a) => console.error(...a) : () => {}
const debugLog = debug || CI ? (...a) => t.comment(...a) : () => {}
const cleanPaths = await getCleanPaths()

// setup fixtures
Expand Down Expand Up @@ -158,7 +158,7 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy
log(`${spawnCmd} ${spawnArgs.join(' ')}`)
log('-'.repeat(40))

const { stderr, stdout } = await spawn(spawnCmd, spawnArgs, {
const p = spawn(spawnCmd, spawnArgs, {
cwd,
env: {
...getEnvPath(),
Expand All @@ -169,10 +169,20 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy
...opts,
})

log(stderr)
log('-'.repeat(40))
log(stdout)
log('='.repeat(40))
// In debug mode, stream stdout and stderr to console so we can debug hanging processes
if (debug) {
p.process.stdout.on('data', (c) => log('STDOUT: ' + c.toString().trim()))
p.process.stderr.on('data', (c) => log('STDERR: ' + c.toString().trim()))
}

const { stdout, stderr } = await p
// If not in debug mode, print full stderr and stdout contents separately
if (!debug) {
log(stderr)
log('-'.repeat(40))
log(stdout)
log('='.repeat(40))
}

return { stderr, stdout }
}
Expand Down
14 changes: 11 additions & 3 deletions smoke-tests/test/large-install.js
Expand Up @@ -5,18 +5,20 @@ const setup = require('./fixtures/setup.js')
const getFixture = (p) => require(
path.resolve(__dirname, './fixtures/large-install', p))

const runTest = async (t) => {
const runTest = async (t, { lowMemory } = {}) => {
const { npm } = await setup(t, {
// This test relies on the actual production registry
mockRegistry: false,
testdir: {
project: {
'package.json': getFixture('package.json'),
'package-lock.json': getFixture('package-lock.json'),
...lowMemory ? {} : { 'package-lock.json': getFixture('package-lock.json') },
},
},
})
return npm('install', '--no-audit', '--no-fund')
return npm('install', '--no-audit', '--no-fund', {
env: lowMemory ? { NODE_OPTIONS: '--max-old-space-size=500' } : {},
})
}

// This test was originally created for https://github.com/npm/cli/issues/6763
Expand All @@ -31,3 +33,9 @@ t.test('large install', async t => {
// installs the same number of packages.
t.match(stdout, `added 126${process.platform === 'linux' ? 4 : 5} packages in`)
})

t.test('large install, no lock and low memory', async t => {
// Run the same install but with no lockfile and constrained max-old-space-size
const { stdout } = await runTest(t, { lowMemory: true })
t.match(stdout, /added \d+ packages/)
})
2 changes: 1 addition & 1 deletion smoke-tests/test/npm-replace-global.js
Expand Up @@ -125,7 +125,7 @@ t.test('publish and replace global self', async t => {
await npmPackage({
manifest: { packuments: [publishedPackument] },
tarballs: { [version]: tarball },
times: 2,
times: 3,
})
await fs.rm(cache, { recursive: true, force: true })
await useNpm('install', 'npm@latest', '--global')
Expand Down
1 change: 1 addition & 0 deletions test/lib/commands/audit.js
Expand Up @@ -184,6 +184,7 @@ t.test('audit fix - bulk endpoint', async t => {
tarballs: {
'1.0.1': path.join(npm.prefix, 'test-dep-a-fixed'),
},
times: 2,
})
const advisory = registry.advisory({ id: 100, vulnerable_versions: '1.0.0' })
registry.nock.post('/-/npm/v1/security/advisories/bulk', body => {
Expand Down
4 changes: 2 additions & 2 deletions workspaces/arborist/lib/arborist/index.js
Expand Up @@ -31,10 +31,10 @@ const { homedir } = require('os')
const { depth } = require('treeverse')
const mapWorkspaces = require('@npmcli/map-workspaces')
const { log, time } = require('proc-log')

const { saveTypeMap } = require('../add-rm-pkg-deps.js')
const AuditReport = require('../audit-report.js')
const relpath = require('../relpath.js')
const PackumentCache = require('../packument-cache.js')

const mixins = [
require('../tracker.js'),
Expand Down Expand Up @@ -82,7 +82,7 @@ class Arborist extends Base {
installStrategy: options.global ? 'shallow' : (options.installStrategy ? options.installStrategy : 'hoisted'),
lockfileVersion: lockfileVersion(options.lockfileVersion),
packageLockOnly: !!options.packageLockOnly,
packumentCache: options.packumentCache || new Map(),
packumentCache: options.packumentCache || new PackumentCache(),
path: options.path || '.',
rebuildBundle: 'rebuildBundle' in options ? !!options.rebuildBundle : true,
replaceRegistryHost: options.replaceRegistryHost,
Expand Down
2 changes: 2 additions & 0 deletions workspaces/arborist/lib/node.js
Expand Up @@ -119,6 +119,8 @@ class Node {
// package's dependencies in a virtual root.
this.sourceReference = sourceReference

// TODO if this came from pacote.manifest we don't have to do this,
// we can be told to skip this step
const pkg = sourceReference ? sourceReference.package
: normalize(options.pkg || {})

Expand Down
77 changes: 77 additions & 0 deletions workspaces/arborist/lib/packument-cache.js
@@ -0,0 +1,77 @@
const { LRUCache } = require('lru-cache')
const { getHeapStatistics } = require('node:v8')
const { log } = require('proc-log')

// This is an in-memory cache that Pacote uses for packuments.
// Packuments are usually cached on disk. This allows for rapid re-requests
// of the same packument to bypass disk reads. The tradeoff here is memory
// usage for disk reads.
class PackumentCache extends LRUCache {
static #heapLimit = Math.floor(getHeapStatistics().heap_size_limit)

#sizeKey
#disposed = new Set()

#log (...args) {
log.silly('packumentCache', ...args)
}

constructor ({
// How much of this.#heapLimit to take up
heapFactor = 0.25,
// How much of this.#maxSize we allow any one packument to take up
// Anything over this is not cached
maxEntryFactor = 0.5,
sizeKey = '_contentLength',
} = {}) {
const maxSize = Math.floor(PackumentCache.#heapLimit * heapFactor)
const maxEntrySize = Math.floor(maxSize * maxEntryFactor)
super({
maxSize,
maxEntrySize,
sizeCalculation: (p) => {
// Don't cache if we dont know the size
// Some versions of pacote set this to `0`, newer versions set it to `null`
if (!p[sizeKey]) {
return maxEntrySize + 1
}
if (p[sizeKey] < 10_000) {
return p[sizeKey] * 2
}
if (p[sizeKey] < 1_000_000) {
return Math.floor(p[sizeKey] * 1.5)
}
// It is less beneficial to store a small amount of super large things
// at the cost of all other packuments.
return maxEntrySize + 1
},
dispose: (v, k) => {
this.#disposed.add(k)
this.#log(k, 'dispose')
},
})
this.#sizeKey = sizeKey
this.#log(`heap:${PackumentCache.#heapLimit} maxSize:${maxSize} maxEntrySize:${maxEntrySize}`)
}

set (k, v, ...args) {
// we use disposed only for a logging signal if we are setting packuments that
// have already been evicted from the cache previously. logging here could help
// us tune this in the future.
const disposed = this.#disposed.has(k)
/* istanbul ignore next - this doesnt happen consistently so hard to test without resorting to unit tests */
if (disposed) {
this.#disposed.delete(k)
}
this.#log(k, 'set', `size:${v[this.#sizeKey]} disposed:${disposed}`)
return super.set(k, v, ...args)
}

has (k, ...args) {
const has = super.has(k, ...args)
this.#log(k, `cache-${has ? 'hit' : 'miss'}`)
return has
}
}

module.exports = PackumentCache
2 changes: 1 addition & 1 deletion workspaces/arborist/test/arborist/rebuild.js
Expand Up @@ -197,7 +197,7 @@ t.test('verify dep flags in script environments', async t => {
const file = resolve(path, 'node_modules', pkg, 'env')
t.strictSame(flags, fs.readFileSync(file, 'utf8').split('\n'), pkg)
}
t.strictSame(checkLogs().sort((a, b) =>
t.strictSame(checkLogs().filter(l => l[0] === 'info' && l[1] === 'run').sort((a, b) =>
localeCompare(a[2], b[2]) || (typeof a[4] === 'string' ? -1 : 1)), [
['info', 'run', 'devdep@1.0.0', 'postinstall', 'node_modules/devdep', 'node ../../env.js'],
['info', 'run', 'devdep@1.0.0', 'postinstall', { code: 0, signal: null }],
Expand Down
2 changes: 1 addition & 1 deletion workspaces/arborist/test/audit-report.js
Expand Up @@ -222,7 +222,7 @@ t.test('audit returns an error', async t => {
const report = await AuditReport.load(tree, arb.options)
t.equal(report.report, null, 'did not get audit response')
t.equal(report.size, 0, 'did not find any vulnerabilities')
t.match(logs, [
t.match(logs.filter(l => l[1].includes('audit')), [
[
'silly',
'audit',
Expand Down

0 comments on commit 722c0fa

Please sign in to comment.