Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve minChunkSize algorithm #4723

Merged
merged 14 commits into from
Feb 3, 2023
7 changes: 2 additions & 5 deletions src/Module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { locate } from 'locate-character';
import MagicString from 'magic-string';
import ExternalModule from './ExternalModule';
import type Graph from './Graph';
import { createHasEffectsContext, createInclusionContext } from './ast/ExecutionContext';
import { createInclusionContext } from './ast/ExecutionContext';
import { nodeConstructors } from './ast/nodes';
import ExportAllDeclaration from './ast/nodes/ExportAllDeclaration';
import ExportDefaultDeclaration from './ast/nodes/ExportDefaultDeclaration';
Expand Down Expand Up @@ -662,10 +662,7 @@ export default class Module {
}

hasEffects(): boolean {
return (
this.info.moduleSideEffects === 'no-treeshake' ||
(this.ast!.included && this.ast!.hasEffects(createHasEffectsContext()))
);
return this.info.moduleSideEffects === 'no-treeshake' || this.ast!.hasCachedEffects();
}

include(): void {
Expand Down
11 changes: 8 additions & 3 deletions src/ast/nodes/Program.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type MagicString from 'magic-string';
import { type RenderOptions, renderStatementList } from '../../utils/renderHelpers';
import { createHasEffectsContext } from '../ExecutionContext';
import type { HasEffectsContext, InclusionContext } from '../ExecutionContext';
import type * as NodeType from './NodeType';
import { type IncludeChildren, NodeBase, type StatementNode } from './shared/Node';
Expand All @@ -9,11 +10,15 @@ export default class Program extends NodeBase {
declare sourceType: 'module';
declare type: NodeType.tProgram;

private hasCachedEffect = false;
private hasCachedEffect: boolean | null = null;

hasCachedEffects(): boolean {
return this.hasCachedEffect === null
? (this.hasCachedEffect = this.hasEffects(createHasEffectsContext()))
: this.hasCachedEffect;
}

hasEffects(context: HasEffectsContext): boolean {
// We are caching here to later more efficiently identify side-effect-free modules
if (this.hasCachedEffect) return true;
for (const node of this.body) {
if (node.hasEffects(context)) {
return (this.hasCachedEffect = true);
Expand Down
287 changes: 212 additions & 75 deletions src/utils/chunkAssignment.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import ExternalModule from '../ExternalModule';
import Module from '../Module';
import { EMPTY_ARRAY } from './blank';
import { getNewSet, getOrCreate } from './getOrCreate';
import { concatLazy } from './iterators';
import { timeEnd, timeStart } from './timers';
Expand Down Expand Up @@ -202,19 +201,21 @@ function isModuleAlreadyLoaded(
return true;
}

EMPTY_ARRAY;

interface ChunkDescription {
alias: null;
dependencies: Set<ChunkDescription>;
dependentChunks: Set<ChunkDescription>;
modules: Module[];
pure: boolean;
signature: string;
size: number | null;
}

interface MergeableChunkDescription extends ChunkDescription {
size: number;
}

type ChunkPartition = {
[key in 'small' | 'big']: {
[subKey in 'pure' | 'sideEffect']: Set<ChunkDescription>;
};
};

function createChunks(
allEntries: Iterable<Module>,
assignedEntriesByModule: DependentModuleMap,
Expand All @@ -226,59 +227,12 @@ function createChunks(
alias: null,
modules
}))
: getOptimizedChunks(chunkModulesBySignature, minChunkSize);
}

function getOptimizedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
) {
timeStart('optimize chunks', 3);
const { chunksToBeMerged, unmergeableChunks } = getMergeableChunks(
chunkModulesBySignature,
minChunkSize
);
for (const sourceChunk of chunksToBeMerged) {
chunksToBeMerged.delete(sourceChunk);
let closestChunk: ChunkDescription | null = null;
let closestChunkDistance = Infinity;
const { signature, size, modules } = sourceChunk;

for (const targetChunk of concatLazy(chunksToBeMerged, unmergeableChunks)) {
const distance = getSignatureDistance(
signature,
targetChunk.signature,
!chunksToBeMerged.has(targetChunk)
);
if (distance === 1) {
closestChunk = targetChunk;
break;
} else if (distance < closestChunkDistance) {
closestChunk = targetChunk;
closestChunkDistance = distance;
}
}
if (closestChunk) {
closestChunk.modules.push(...modules);
if (chunksToBeMerged.has(closestChunk)) {
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
if ((closestChunk.size += size) > minChunkSize) {
chunksToBeMerged.delete(closestChunk);
unmergeableChunks.push(closestChunk);
}
}
} else {
unmergeableChunks.push(sourceChunk);
}
}
timeEnd('optimize chunks', 3);
return unmergeableChunks;
: getOptimizedChunks(chunkModulesBySignature, minChunkSize).map(({ modules }) => ({
alias: null,
modules
}));
}

const CHAR_DEPENDENT = 'X';
const CHAR_INDEPENDENT = '_';
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);

function getChunkModulesBySignature(
assignedEntriesByModule: ReadonlyDependentModuleMap,
allEntries: Iterable<Module>
Expand All @@ -299,33 +253,216 @@ function getChunkModulesBySignature(
return chunkModules;
}

function getMergeableChunks(
/**
* This function tries to get rid of small chunks by merging them with other
* chunks. In order to merge chunks, one must obey the following rule:
* - When merging several chunks, at most one of the chunks can have side
* effects
* - When one of the chunks has side effects, the entry points depending on that
* chunk need to be a super set of the entry points depending on the other
* chunks
* - Pure chunks can always be merged
* - We use the entry point dependence signature to calculate "chunk distance",
* i.e. how likely it is that two chunks are loaded together
*/
function getOptimizedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
) {
const chunksToBeMerged = new Set() as Set<MergeableChunkDescription> & {
has(chunk: unknown): chunk is MergeableChunkDescription;
};
const unmergeableChunks: ChunkDescription[] = [];
const alias = null;
timeStart('optimize chunks', 3);
const chunkPartition = getPartitionedChunks(chunkModulesBySignature, minChunkSize);
if (chunkPartition.small.sideEffect.size > 0) {
mergeChunks(
chunkPartition.small.sideEffect,
[chunkPartition.small.pure, chunkPartition.big.pure],
minChunkSize,
chunkPartition
);
}

if (chunkPartition.small.pure.size > 0) {
mergeChunks(
chunkPartition.small.pure,
[chunkPartition.small.pure, chunkPartition.big.sideEffect, chunkPartition.big.pure],
minChunkSize,
chunkPartition
);
}
timeEnd('optimize chunks', 3);
return [
...chunkPartition.small.sideEffect,
...chunkPartition.small.pure,
...chunkPartition.big.sideEffect,
...chunkPartition.big.pure
];
}

const CHAR_DEPENDENT = 'X';
const CHAR_INDEPENDENT = '_';
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);

function getPartitionedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
): ChunkPartition {
const smallPureChunks: ChunkDescription[] = [];
const bigPureChunks: ChunkDescription[] = [];
const smallSideEffectChunks: ChunkDescription[] = [];
const bigSideEffectChunks: ChunkDescription[] = [];
const chunkByModule = new Map<Module, ChunkDescription>();
for (const [signature, modules] of Object.entries(chunkModulesBySignature)) {
const chunkDescription: ChunkDescription = {
dependencies: new Set<ChunkDescription>(),
dependentChunks: new Set<ChunkDescription>(),
modules,
pure: true,
signature,
size: 0
};
let size = 0;
checkModules: {
let pure = true;
for (const module of modules) {
chunkByModule.set(module, chunkDescription);
pure &&= !module.hasEffects();
// Unfortunately, we cannot take tree-shaking into account here because
// rendering did not happen yet
size += module.originalCode.length;
}
chunkDescription.pure = pure;
chunkDescription.size = size;
(size < minChunkSize
? pure
? smallPureChunks
: smallSideEffectChunks
: pure
? bigPureChunks
: bigSideEffectChunks
).push(chunkDescription);
}
sortChunksAndAddDependencies(
[bigPureChunks, bigSideEffectChunks, smallPureChunks, smallSideEffectChunks],
chunkByModule
);
return {
big: { pure: new Set(bigPureChunks), sideEffect: new Set(bigSideEffectChunks) },
small: { pure: new Set(smallPureChunks), sideEffect: new Set(smallSideEffectChunks) }
};
}

function sortChunksAndAddDependencies(
chunkLists: ChunkDescription[][],
chunkByModule: Map<Module, ChunkDescription>
) {
for (const chunks of chunkLists) {
chunks.sort(compareChunks);
for (const chunk of chunks) {
const { dependencies, modules } = chunk;
for (const module of modules) {
if (module.hasEffects()) {
break checkModules;
for (const dependency of module.getDependenciesToBeIncluded()) {
const dependencyChunk = chunkByModule.get(dependency as Module);
if (dependencyChunk && dependencyChunk !== chunk) {
dependencies.add(dependencyChunk);
dependencyChunk.dependentChunks.add(chunk);
}
}
size += module.magicString.toString().length;
if (size > minChunkSize) {
break checkModules;
}
}
}
}

function compareChunks(
{ size: sizeA }: ChunkDescription,
{ size: sizeB }: ChunkDescription
): number {
return sizeA - sizeB;
}

function mergeChunks(
chunksToBeMerged: Set<ChunkDescription>,
targetChunks: Set<ChunkDescription>[],
minChunkSize: number,
chunkPartition: ChunkPartition
) {
for (const mergedChunk of chunksToBeMerged) {
let closestChunk: ChunkDescription | null = null;
let closestChunkDistance = Infinity;
const { signature, modules, pure, size } = mergedChunk;

for (const targetChunk of concatLazy(targetChunks)) {
if (mergedChunk === targetChunk) continue;
// Possible improvement:
// For dynamic entries depending on a pure chunk, it is safe to merge that
// chunk into the chunk doing the dynamic import (i.e. into an "already
// loaded chunk") even if it is not pure.
// One way of handling this could be to add all "already loaded entries"
// of the dynamic importers into the signature as well. That could also
// change the way we do code-splitting for already loaded entries.
const distance = pure
? getSignatureDistance(signature, targetChunk.signature, !targetChunk.pure)
: getSignatureDistance(targetChunk.signature, signature, true);
if (distance < closestChunkDistance && isValidMerge(mergedChunk, targetChunk)) {
if (distance === 1) {
closestChunk = targetChunk;
break;
}
closestChunk = targetChunk;
closestChunkDistance = distance;
}
chunksToBeMerged.add({ alias, modules, signature, size });
continue;
}
unmergeableChunks.push({ alias, modules, signature, size: null });
if (closestChunk) {
chunksToBeMerged.delete(mergedChunk);
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).delete(closestChunk);
closestChunk.modules.push(...modules);
closestChunk.size += size;
closestChunk.pure &&= pure;
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
const { dependencies, dependentChunks } = closestChunk;
for (const dependency of mergedChunk.dependencies) {
dependencies.add(dependency);
}
for (const dependentChunk of mergedChunk.dependentChunks) {
dependentChunks.add(dependentChunk);
dependentChunk.dependencies.delete(mergedChunk);
dependentChunk.dependencies.add(closestChunk);
}
dependencies.delete(closestChunk);
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).add(closestChunk);
}
}
return { chunksToBeMerged, unmergeableChunks };
}

// Merging will not produce cycles if none of the direct non-merged dependencies
// of a chunk have the other chunk as a transitive dependency
function isValidMerge(mergedChunk: ChunkDescription, targetChunk: ChunkDescription) {
return !(
hasTransitiveDependency(mergedChunk, targetChunk) ||
hasTransitiveDependency(targetChunk, mergedChunk)
);
}

function hasTransitiveDependency(
dependentChunk: ChunkDescription,
dependencyChunk: ChunkDescription
) {
const chunksToCheck = new Set(dependentChunk.dependencies);
for (const { dependencies } of chunksToCheck) {
for (const dependency of dependencies) {
if (dependency === dependencyChunk) {
return true;
}
chunksToCheck.add(dependency);
}
}
return false;
}

function getChunksInPartition(
chunk: ChunkDescription,
minChunkSize: number,
chunkPartition: ChunkPartition
): Set<ChunkDescription> {
const subPartition = chunk.size < minChunkSize ? chunkPartition.small : chunkPartition.big;
return chunk.pure ? subPartition.pure : subPartition.sideEffect;
}

function getSignatureDistance(
Expand Down
2 changes: 1 addition & 1 deletion src/utils/iterators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* their iterators. Useful when e.g. working with large sets or lists and when
* there is a chance that the iterators will not be fully exhausted.
*/
export function* concatLazy<T>(...iterables: Iterable<T>[]) {
export function* concatLazy<T>(iterables: Iterable<T>[]): Iterable<T> {
for (const iterable of iterables) {
yield* iterable;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module.exports = {
description: 'avoids circular dependencies when merging chunks',
options: {
input: ['main1.js', 'main2.js', 'main3.js'],
output: {
experimentalMinChunkSize: 100
}
}
};