Skip to content

Commit

Permalink
Support mangle.nth_identifer to customize base54 behavior (#1038)
Browse files Browse the repository at this point in the history
* Support nth_identifier option to customize base54

* Ensure base54 is operational immediately
  • Loading branch information
dmichon-msft committed Aug 24, 2021
1 parent 75df7e0 commit f4a3ca4
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 26 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,12 @@ If you happen to need the source map as a raw object, set `sourceMap.asObject` t
- `module` (default `false`) -- Pass `true` an ES6 modules, where the toplevel
scope is not the global scope. Implies `toplevel`.

- `nth_identifier` (default: an internal mangler that weights based on character
frequency analysis) -- Pass an object with a `get(n)` function that converts an
ordinal into the nth most favored (usually shortest) identifier.
Optionally also provide `reset()`, `sort()`, and `consider(chars, delta)` to
use character frequency analysis of the source code.

- `reserved` (default `[]`) -- Pass an array of identifiers that should be
excluded from mangling. Example: `["foo", "bar"]`.

Expand Down Expand Up @@ -944,6 +950,12 @@ await minify(code, { mangle: { toplevel: true } }).code;
- `false` -- `obj["prop"]` is mangled.
- `true` -- `obj.prop` is mangled unless there is `obj["prop"]` elsewhere in the code.

- `nth_identifer` (default: an internal mangler that weights based on character
frequency analysis) -- Pass an object with a `get(n)` function that converts an
ordinal into the nth most favored (usually shortest) identifier.
Optionally also provide `reset()`, `sort()`, and `consider(chars, delta)` to
use character frequency analysis of the source code.

- `regex` (default: `null`) — Pass a [RegExp literal or pattern string](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) to only mangle property matching the regular expression.

- `reserved` (default: `[]`) — Do not mangle property names listed in the
Expand Down
9 changes: 5 additions & 4 deletions lib/compress/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ class Compressor extends TreeWalker {
var passes = +this.options.passes || 1;
var min_count = 1 / 0;
var stopping = false;
var mangle = { ie8: this.option("ie8") };
var nth_identifier = this.mangle_options && this.mangle_options.nth_identifier || base54;
var mangle = { ie8: this.option("ie8"), nth_identifier: nth_identifier };
for (var pass = 0; pass < passes; pass++) {
this._toplevel.figure_out_scope(mangle);
if (pass === 0 && this.option("drop_console")) {
Expand Down Expand Up @@ -1889,7 +1890,7 @@ def_optimize(AST_Switch, function(self, compressor) {
function is_inert_body(branch) {
return !aborts(branch) && !make_node(AST_BlockStatement, branch, {
body: branch.body
}).has_side_effects(compressor)
}).has_side_effects(compressor);
}
});

Expand Down Expand Up @@ -2262,6 +2263,7 @@ def_optimize(AST_Call, function(self, compressor) {
argnames: [],
body: []
}).optimize(compressor);
var nth_identifier = compressor.mangle_options && compressor.mangle_options.nth_identifier || base54;
if (self.args.every((x) => x instanceof AST_String)) {
// quite a corner-case, but we can handle it:
// https://github.com/mishoo/UglifyJS2/issues/203
Expand All @@ -2271,14 +2273,13 @@ def_optimize(AST_Call, function(self, compressor) {
return arg.value;
}).join(",") + "){" + self.args[self.args.length - 1].value + "})";
var ast = parse(code);
var mangle = { ie8: compressor.option("ie8") };
var mangle = { ie8: compressor.option("ie8"), nth_identifier: nth_identifier };
ast.figure_out_scope(mangle);
var comp = new Compressor(compressor.options, {
mangle_options: compressor.mangle_options
});
ast = ast.transform(comp);
ast.figure_out_scope(mangle);
base54.reset();
ast.compute_char_frequency(mangle);
ast.mangle_names(mangle);
var fun;
Expand Down
2 changes: 0 additions & 2 deletions lib/minify.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import { AST_Toplevel, AST_Node } from "./ast.js";
import { parse } from "./parse.js";
import { OutputStream } from "./output.js";
import { Compressor } from "./compress/index.js";
import { base54 } from "./scope.js";
import { SourceMap } from "./sourcemap.js";
import {
mangle_properties,
Expand Down Expand Up @@ -203,7 +202,6 @@ async function minify(files, options) {
if (options.mangle) toplevel.figure_out_scope(options.mangle);
if (timings) timings.mangle = Date.now();
if (options.mangle) {
base54.reset();
toplevel.compute_char_frequency(options.mangle);
toplevel.mangle_names(options.mangle);
}
Expand Down
7 changes: 5 additions & 2 deletions lib/propmangle.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,15 @@ function mangle_properties(ast, options) {
cache: null,
debug: false,
keep_quoted: false,
nth_identifier: base54,
only_cache: false,
regex: null,
reserved: null,
undeclared: false,
}, true);

var nth_identifier = options.nth_identifier;

var reserved_option = options.reserved;
if (!Array.isArray(reserved_option)) reserved_option = [reserved_option];
var reserved = new Set(reserved_option);
Expand Down Expand Up @@ -312,7 +315,7 @@ function mangle_properties(ast, options) {
// either debug mode is off, or it is on and we could not use the mangled name
if (!mangled) {
do {
mangled = base54(++cname);
mangled = nth_identifier.get(++cname);
} while (!can_mangle(mangled));
}

Expand All @@ -324,7 +327,7 @@ function mangle_properties(ast, options) {
function mangle_private(name) {
let mangled = private_cache.get(name);
if (!mangled) {
mangled = base54(++cprivate);
mangled = nth_identifier.get(++cprivate);
private_cache.set(name, mangled);
}

Expand Down
54 changes: 37 additions & 17 deletions lib/scope.js
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,9 @@ AST_Scope.DEFMETHOD("def_variable", function(symbol, init) {

function next_mangled(scope, options) {
var ext = scope.enclosed;
var nth_identifier = options.nth_identifier;
out: while (true) {
var m = base54(++scope.cname);
var m = nth_identifier.get(++scope.cname);
if (ALL_RESERVED_WORDS.has(m)) continue; // skip over "do"

// https://github.com/mishoo/UglifyJS2/issues/242 -- do not
Expand Down Expand Up @@ -744,6 +745,7 @@ AST_Symbol.DEFMETHOD("global", function() {
AST_Toplevel.DEFMETHOD("_default_mangler_options", function(options) {
options = defaults(options, {
eval : false,
nth_identifier : base54,
ie8 : false,
keep_classnames: false,
keep_fnames : false,
Expand All @@ -765,6 +767,7 @@ AST_Toplevel.DEFMETHOD("_default_mangler_options", function(options) {

AST_Toplevel.DEFMETHOD("mangle_names", function(options) {
options = this._default_mangler_options(options);
var nth_identifier = options.nth_identifier;

// We only need to mangle declaration nodes. Special logic wired
// into the code generator will display the mangled name if it's
Expand Down Expand Up @@ -816,7 +819,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(options) {
if (node instanceof AST_Label) {
let name;
do {
name = base54(++lname);
name = nth_identifier.get(++lname);
} while (ALL_RESERVED_WORDS.has(name));
node.mangled_name = name;
return true;
Expand Down Expand Up @@ -878,9 +881,12 @@ AST_Toplevel.DEFMETHOD("find_colliding_names", function(options) {
});

AST_Toplevel.DEFMETHOD("expand_names", function(options) {
base54.reset();
base54.sort();
options = this._default_mangler_options(options);
var nth_identifier = options.nth_identifier;
if (nth_identifier.reset && nth_identifier.sort) {
nth_identifier.reset();
nth_identifier.sort();
}
var avoid = this.find_colliding_names(options);
var cname = 0;
this.globals.forEach(rename);
Expand All @@ -892,7 +898,7 @@ AST_Toplevel.DEFMETHOD("expand_names", function(options) {
function next_name() {
var name;
do {
name = base54(cname++);
name = nth_identifier.get(cname++);
} while (avoid.has(name) || ALL_RESERVED_WORDS.has(name));
return name;
}
Expand All @@ -919,30 +925,37 @@ AST_Sequence.DEFMETHOD("tail_node", function() {

AST_Toplevel.DEFMETHOD("compute_char_frequency", function(options) {
options = this._default_mangler_options(options);
var nth_identifier = options.nth_identifier;
if (!nth_identifier.reset || !nth_identifier.consider || !nth_identifier.sort) {
// If the identifier mangler is invariant, skip computing character frequency.
return;
}
nth_identifier.reset();

try {
AST_Node.prototype.print = function(stream, force_parens) {
this._print(stream, force_parens);
if (this instanceof AST_Symbol && !this.unmangleable(options)) {
base54.consider(this.name, -1);
nth_identifier.consider(this.name, -1);
} else if (options.properties) {
if (this instanceof AST_DotHash) {
base54.consider("#" + this.property, -1);
nth_identifier.consider("#" + this.property, -1);
} else if (this instanceof AST_Dot) {
base54.consider(this.property, -1);
nth_identifier.consider(this.property, -1);
} else if (this instanceof AST_Sub) {
skip_string(this.property);
}
}
};
base54.consider(this.print_to_string(), 1);
nth_identifier.consider(this.print_to_string(), 1);

This comment has been minimized.

Copy link
@gdh1995

gdh1995 Aug 27, 2021

Contributor

Why does this line iterate over all characters in the (compressed but not mangled) source code? I think it will include too many irrelevant characters, like keywords, string values and comments.

This comment has been minimized.

Copy link
@jridgewell

jridgewell Aug 27, 2021

Collaborator

This is trying to compute the character frequency of all chars that could appear in the output. This is an attempt to get better gzip compression, since reusing frequent chars would allow better compression. Gzip doesn't care if the char is part of a keyword or string.

This comment has been minimized.

Copy link
@gdh1995

gdh1995 via email Aug 28, 2021

Contributor
} finally {
AST_Node.prototype.print = AST_Node.prototype._print;
}
base54.sort();
nth_identifier.sort();

function skip_string(node) {
if (node instanceof AST_String) {
base54.consider(node.value, -1);
nth_identifier.consider(node.value, -1);
} else if (node instanceof AST_Conditional) {
skip_string(node.consequent);
skip_string(node.alternative);
Expand All @@ -966,19 +979,20 @@ const base54 = (() => {
frequency.set(ch, 0);
});
}
base54.consider = function(str, delta) {
function consider(str, delta) {
for (var i = str.length; --i >= 0;) {
frequency.set(str[i], frequency.get(str[i]) + delta);
}
};
}
function compare(a, b) {
return frequency.get(b) - frequency.get(a);
}
base54.sort = function() {
function sort() {
chars = mergeSort(leading, compare).concat(mergeSort(digits, compare));
};
base54.reset = reset;
}
// Ensure this is in a usable initial state.
reset();
sort();
function base54(num) {
var ret = "", base = 54;
num++;
Expand All @@ -990,7 +1004,13 @@ const base54 = (() => {
} while (num > 0);
return ret;
}
return base54;

return {
get: base54,
consider,
reset,
sort
};
})();

export {
Expand Down
1 change: 0 additions & 1 deletion test/compress.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ async function run_compress_tests() {
var output = cmp.compress(input);
output.figure_out_scope(test.mangle);
if (test.mangle) {
base54.reset();
output.compute_char_frequency(test.mangle);
(function(cache) {
if (!cache) return;
Expand Down
25 changes: 25 additions & 0 deletions test/compress/properties.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,31 @@ mangle_unquoted_properties: {
}
}

mangle_nth_identifier: {
mangle = {
properties: {
nth_identifier: (function () {
function get(n) {
return "zyxwvutsrq"[n];
}
return {
get
};
})()
},
}
input: {
var a = {};
a.foo = "bar";
x = { baz: "ban" };
}
expect: {
var a = {};
a.v = "bar";
x = { u: "ban" };
}
}

mangle_debug: {
mangle = {
properties: {
Expand Down
30 changes: 30 additions & 0 deletions test/compress/rename.js
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,36 @@ mangle_catch_redef_2_ie8_toplevel: {
expect_stdout: "undefined"
}

mangle_catch_nth_identifier: {
rename = true
options = {
ie8: false,
toplevel: true,
}
mangle = {
ie8: false,
toplevel: true,
nth_identifier: (function () {
function get(n) {
return "foo";
}
return {
get
};
})()
}
input: {
try {
throw "FAIL1";
} catch (a) {
var a = "FAIL2";
}
console.log(a);
}
expect_exact: 'try{throw"FAIL1"}catch(foo){var foo="FAIL2"}console.log(foo);'
expect_stdout: "undefined"
}

issue_2120_1: {
rename = true
mangle = {
Expand Down
36 changes: 36 additions & 0 deletions tools/terser.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,52 @@ export interface MangleOptions {
keep_classnames?: boolean | RegExp;
keep_fnames?: boolean | RegExp;
module?: boolean;
nth_identifier?: SimpleIdentifierMangler | WeightedIdentifierMangler;
properties?: boolean | ManglePropertiesOptions;
reserved?: string[];
safari10?: boolean;
toplevel?: boolean;
}

/**
* An identifier mangler for which the output is invariant with respect to the source code.
*/
export interface SimpleIdentifierMangler {
/**
* Obtains the nth most favored (usually shortest) identifier to rename a variable to.
* The mangler will increment n and retry until the return value is not in use in scope, and is not a reserved word.
* This function is expected to be stable; Evaluating get(n) === get(n) should always return true.
* @param n The ordinal of the identifier.
*/
get(n: number): string;
}

/**
* An identifier mangler that leverages character frequency analysis to determine identifier precedence.
*/
export interface WeightedIdentifierMangler extends SimpleIdentifierMangler {
/**
* Modifies the internal weighting of the input characters by the specified delta.
* Will be invoked on the entire printed AST, and then deduct mangleable identifiers.
* @param chars The characters to modify the weighting of.
* @param delta The numeric weight to add to the characters.
*/
consider(chars: string, delta: number): number;
/**
* Resets character weights.
*/
reset(): void;
/**
* Sorts identifiers by character frequency, in preparation for calls to get(n).
*/
sort(): void;
}

export interface ManglePropertiesOptions {
builtins?: boolean;
debug?: boolean;
keep_quoted?: boolean | 'strict';
nth_identifier?: SimpleIdentifierMangler | WeightedIdentifierMangler;
regex?: RegExp | string;
reserved?: string[];
}
Expand Down

0 comments on commit f4a3ca4

Please sign in to comment.