From 895f50c84b47f2506c02b74972da9640db800c54 Mon Sep 17 00:00:00 2001 From: Evan Wallace Date: Tue, 6 Dec 2022 21:48:58 -0500 Subject: [PATCH] fix #1843: generate shorter data urls if possible --- CHANGELOG.md | 4 + internal/bundler/bundler.go | 3 + internal/bundler/bundler_loader_test.go | 80 +++++++++++++++++++ .../bundler/snapshots/snapshots_default.txt | 42 +++++----- .../bundler/snapshots/snapshots_loader.txt | 45 ++++++++++- internal/helpers/dataurl.go | 60 ++++++++++++++ internal/helpers/dataurl_test.go | 48 +++++++++++ scripts/dataurl-escapes.html | 55 +++++++++++++ scripts/js-api-tests.js | 14 +++- 9 files changed, 324 insertions(+), 27 deletions(-) create mode 100644 internal/helpers/dataurl.go create mode 100644 internal/helpers/dataurl_test.go create mode 100644 scripts/dataurl-escapes.html diff --git a/CHANGELOG.md b/CHANGELOG.md index 34061d9c6b8..e4d8a3b6954 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,10 @@ The `define` parameter appears at first glance to take a JSON object if you aren't paying close attention, but this actually isn't true. Values for `define` are instead strings of JavaScript code. This means you have to use `define: { foo: '"bar"' }` to replace `foo` with the string `"bar"`. Using `define: { foo: 'bar' }` actually replaces `foo` with the identifier `bar`. Previously esbuild allowed you to pass `define: { foo: false }` and `false` was automatically converted into a string, which made it more confusing to understand what `define` actually represents. Starting with this release, passing non-string values such as with `define: { foo: false }` will no longer be allowed. You will now have to write `define: { foo: 'false' }` instead. +* Generate shorter data URLs if possible ([#1843](https://github.com/evanw/esbuild/issues/1843)) + + Loading a file with esbuild's `dataurl` loader generates a JavaScript module with a [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) for that file in a string as a single default export. Previously the data URLs generated by esbuild all used [base64 encoding](https://en.wikipedia.org/wiki/Base64). However, this is unnecessarily long for most textual data (e.g. SVG images). So with this release, esbuild's `dataurl` loader will now use percent encoding instead of base64 encoding if the result will be shorter. This can result in ~25% smaller data URLs for large SVGs. If you want the old behavior, you can use the `base64` loader instead and then construct the data URL yourself. + * Avoid marking entry points as external ([#2382](https://github.com/evanw/esbuild/issues/2382)) Previously you couldn't specify `--external:*` to mark all import paths as external because that also ended up making the entry point itself external, which caused the build to fail. With this release, esbuild's `external` API parameter no longer applies to entry points so using `--external:*` is now possible. diff --git a/internal/bundler/bundler.go b/internal/bundler/bundler.go index 1951313b45a..fcd449c6282 100644 --- a/internal/bundler/bundler.go +++ b/internal/bundler/bundler.go @@ -288,6 +288,9 @@ func parseFile(args parseArgs) { mimeType := guessMimeType(ext, source.Contents) encoded := base64.StdEncoding.EncodeToString([]byte(source.Contents)) url := fmt.Sprintf("data:%s;base64,%s", mimeType, encoded) + if percentURL, ok := helpers.EncodeStringAsPercentEscapedDataURL(mimeType, source.Contents); ok && len(percentURL) < len(url) { + url = percentURL + } expr := js_ast.Expr{Data: &js_ast.EString{Value: helpers.StringToUTF16(url)}} ast := js_parser.LazyExportAST(args.log, source, js_parser.OptionsFromConfig(&args.options), expr, "") ast.URLForCSS = url diff --git a/internal/bundler/bundler_loader_test.go b/internal/bundler/bundler_loader_test.go index a0ff39390eb..e4658d5733c 100644 --- a/internal/bundler/bundler_loader_test.go +++ b/internal/bundler/bundler_loader_test.go @@ -1040,6 +1040,86 @@ func TestLoaderDataURLExtensionBasedMIME(t *testing.T) { }) } +// Percent-encoded data URLs should switch over to base64 +// data URLs if it would result in a smaller size +func TestLoaderDataURLBase64VsPercentEncoding(t *testing.T) { + loader_suite.expectBundled(t, bundled{ + files: map[string]string{ + "/entry.js": ` + import a from './shouldUsePercent_1.txt' + import b from './shouldUsePercent_2.txt' + import c from './shouldUseBase64_1.txt' + import d from './shouldUseBase64_2.txt' + console.log( + a, + b, + c, + d, + ) + `, + "/shouldUsePercent_1.txt": "\n\n\n", + "/shouldUsePercent_2.txt": "\n\n\n\n", + "/shouldUseBase64_1.txt": "\n\n\n\n\n", + "/shouldUseBase64_2.txt": "\n\n\n\n\n\n", + }, + entryPaths: []string{"/entry.js"}, + options: config.Options{ + Mode: config.ModeBundle, + AbsOutputFile: "/out.js", + ExtensionToLoader: map[string]config.Loader{ + ".js": config.LoaderJS, + ".txt": config.LoaderDataURL, + }, + }, + }) +} + +func TestLoaderDataURLBase64InvalidUTF8(t *testing.T) { + loader_suite.expectBundled(t, bundled{ + files: map[string]string{ + "/entry.js": ` + import a from './binary.txt' + console.log(a) + `, + "/binary.txt": "\xFF", + }, + entryPaths: []string{"/entry.js"}, + options: config.Options{ + Mode: config.ModeBundle, + AbsOutputFile: "/out.js", + ExtensionToLoader: map[string]config.Loader{ + ".js": config.LoaderJS, + ".txt": config.LoaderDataURL, + }, + }, + }) +} + +func TestLoaderDataURLEscapePercents(t *testing.T) { + loader_suite.expectBundled(t, bundled{ + files: map[string]string{ + "/entry.js": ` + import a from './percents.txt' + console.log(a) + `, + "/percents.txt": ` +%, %3, %33, %333 +%, %e, %ee, %eee +%, %E, %EE, %EEE +`, + }, + entryPaths: []string{"/entry.js"}, + options: config.Options{ + Mode: config.ModeBundle, + AbsOutputFile: "/out.js", + ExtensionToLoader: map[string]config.Loader{ + ".js": config.LoaderJS, + ".txt": config.LoaderDataURL, + }, + }, + }) +} + func TestLoaderCopyWithBundleFromJS(t *testing.T) { default_suite.expectBundled(t, bundled{ files: map[string]string{ diff --git a/internal/bundler/snapshots/snapshots_default.txt b/internal/bundler/snapshots/snapshots_default.txt index 93ab818cd0c..a65df6b76bd 100644 --- a/internal/bundler/snapshots/snapshots_default.txt +++ b/internal/bundler/snapshots/snapshots_default.txt @@ -2056,67 +2056,67 @@ console.log([ TestLoaderDataURLExtensionBasedMIME ---------- /out/entry.js ---------- // example.css -var example_default = "data:text/css;charset=utf-8;base64,Y3Nz"; +var example_default = "data:text/css;charset=utf-8,css"; // example.eot -var example_default2 = "data:application/vnd.ms-fontobject;base64,ZW90"; +var example_default2 = "data:application/vnd.ms-fontobject,eot"; // example.gif -var example_default3 = ""; +var example_default3 = "data:image/gif,gif"; // example.htm -var example_default4 = "data:text/html;charset=utf-8;base64,aHRt"; +var example_default4 = "data:text/html;charset=utf-8,htm"; // example.html -var example_default5 = "data:text/html;charset=utf-8;base64,aHRtbA=="; +var example_default5 = "data:text/html;charset=utf-8,html"; // example.jpeg -var example_default6 = ""; +var example_default6 = "data:image/jpeg,jpeg"; // example.jpg -var example_default7 = ""; +var example_default7 = "data:image/jpeg,jpg"; // example.js -var example_default8 = "data:text/javascript;charset=utf-8;base64,anM="; +var example_default8 = "data:text/javascript;charset=utf-8,js"; // example.json -var example_default9 = "data:application/json;base64,anNvbg=="; +var example_default9 = "data:application/json,json"; // example.mjs -var example_default10 = "data:text/javascript;charset=utf-8;base64,bWpz"; +var example_default10 = "data:text/javascript;charset=utf-8,mjs"; // example.otf -var example_default11 = "data:font/otf;base64,b3Rm"; +var example_default11 = "data:font/otf,otf"; // example.pdf -var example_default12 = "data:application/pdf;base64,cGRm"; +var example_default12 = "data:application/pdf,pdf"; // example.png -var example_default13 = ""; +var example_default13 = "data:image/png,png"; // example.sfnt -var example_default14 = "data:font/sfnt;base64,c2ZudA=="; +var example_default14 = "data:font/sfnt,sfnt"; // example.svg -var example_default15 = ""; +var example_default15 = "data:image/svg+xml,svg"; // example.ttf -var example_default16 = "data:font/ttf;base64,dHRm"; +var example_default16 = "data:font/ttf,ttf"; // example.wasm -var example_default17 = "data:application/wasm;base64,d2FzbQ=="; +var example_default17 = "data:application/wasm,wasm"; // example.webp -var example_default18 = ""; +var example_default18 = "data:image/webp,webp"; // example.woff -var example_default19 = "data:font/woff;base64,d29mZg=="; +var example_default19 = "data:font/woff,woff"; // example.woff2 -var example_default20 = "data:font/woff2;base64,d29mZjI="; +var example_default20 = "data:font/woff2,woff2"; // example.xml -var example_default21 = "data:text/xml;charset=utf-8;base64,eG1s"; +var example_default21 = "data:text/xml;charset=utf-8,xml"; export { example_default as css, example_default2 as eot, diff --git a/internal/bundler/snapshots/snapshots_loader.txt b/internal/bundler/snapshots/snapshots_loader.txt index f52e18aa75a..0dbb5988679 100644 --- a/internal/bundler/snapshots/snapshots_loader.txt +++ b/internal/bundler/snapshots/snapshots_loader.txt @@ -128,23 +128,64 @@ var y_default = "eQ=="; var x_b64 = require_x(); console.log(x_b64, y_default); +================================================================================ +TestLoaderDataURLBase64InvalidUTF8 +---------- /out.js ---------- +// binary.txt +var binary_default = "data:text/plain;charset=utf-8;base64,/w=="; + +// entry.js +console.log(binary_default); + +================================================================================ +TestLoaderDataURLBase64VsPercentEncoding +---------- /out.js ---------- +// shouldUsePercent_1.txt +var shouldUsePercent_1_default = "data:text/plain;charset=utf-8,%0A%0A%0A"; + +// shouldUsePercent_2.txt +var shouldUsePercent_2_default = "data:text/plain;charset=utf-8,%0A%0A%0A%0A"; + +// shouldUseBase64_1.txt +var shouldUseBase64_1_default = "data:text/plain;charset=utf-8;base64,CgoKCgo="; + +// shouldUseBase64_2.txt +var shouldUseBase64_2_default = "data:text/plain;charset=utf-8;base64,CgoKCgoK"; + +// entry.js +console.log( + shouldUsePercent_1_default, + shouldUsePercent_2_default, + shouldUseBase64_1_default, + shouldUseBase64_2_default +); + ================================================================================ TestLoaderDataURLCommonJSAndES6 ---------- /out.js ---------- // x.txt var require_x = __commonJS({ "x.txt"(exports, module) { - module.exports = "data:text/plain;charset=utf-8;base64,eA=="; + module.exports = "data:text/plain;charset=utf-8,x"; } }); // y.txt -var y_default = "data:text/plain;charset=utf-8;base64,eQ=="; +var y_default = "data:text/plain;charset=utf-8,y"; // entry.js var x_url = require_x(); console.log(x_url, y_default); +================================================================================ +TestLoaderDataURLEscapePercents +---------- /out.js ---------- +// percents.txt +var percents_default = "data:text/plain;charset=utf-8,%0A%, %3, %2533, %25333%0A%, %e, %25ee, %25eee%0A%, %E, %25EE, %25EEE%0A"; + +// entry.js +console.log(percents_default); + ================================================================================ TestLoaderFile ---------- /out/test-IPILGNO5.svg ---------- diff --git a/internal/helpers/dataurl.go b/internal/helpers/dataurl.go new file mode 100644 index 00000000000..cebd584c064 --- /dev/null +++ b/internal/helpers/dataurl.go @@ -0,0 +1,60 @@ +package helpers + +import ( + "strings" + "unicode/utf8" +) + +// See "scripts/dataurl-escapes.html" for how this was derived +func EncodeStringAsPercentEscapedDataURL(mimeType string, text string) (string, bool) { + hex := "0123456789ABCDEF" + sb := strings.Builder{} + n := len(text) + i := 0 + runStart := 0 + sb.WriteString("data:") + sb.WriteString(mimeType) + sb.WriteByte(',') + + // Scan for trailing characters that need to be escaped + trailingStart := n + for trailingStart > 0 { + if c := text[trailingStart-1]; c > 0x20 || c == '\t' || c == '\n' || c == '\r' { + break + } + trailingStart-- + } + + for i < n { + c, width := utf8.DecodeRuneInString(text[i:]) + + // We can't encode invalid UTF-8 data + if c == utf8.RuneError && width == 1 { + return "", false + } + + // Escape this character if needed + if c == '\t' || c == '\n' || c == '\r' || c == '#' || i >= trailingStart || + (c == '%' && i+2 < n && isHex(text[i+1]) && isHex(text[i+2])) { + if runStart < i { + sb.WriteString(text[runStart:i]) + } + sb.WriteByte('%') + sb.WriteByte(hex[c>>4]) + sb.WriteByte(hex[c&15]) + runStart = i + width + } + + i += width + } + + if runStart < n { + sb.WriteString(text[runStart:]) + } + + return sb.String(), true +} + +func isHex(c byte) bool { + return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' +} diff --git a/internal/helpers/dataurl_test.go b/internal/helpers/dataurl_test.go new file mode 100644 index 00000000000..8b7ca60209e --- /dev/null +++ b/internal/helpers/dataurl_test.go @@ -0,0 +1,48 @@ +package helpers_test + +import ( + "fmt" + "testing" + + "github.com/evanw/esbuild/internal/helpers" +) + +func TestEncodeDataURL(t *testing.T) { + check := func(raw string, expected string) { + url, ok := helpers.EncodeStringAsPercentEscapedDataURL("text/plain", raw) + if !ok { + t.Fatalf("Failed to encode %q", raw) + } else if url != expected { + t.Fatalf("Got %q but expected %q", url, expected) + } + } + + for i := 0; i <= 0xFF; i++ { + alwaysEscape := i == '\t' || i == '\r' || i == '\n' || i == '#' + trailingEscape := i <= 0x20 || i == '#' + + if trailingEscape { + check(string(rune(i)), fmt.Sprintf("data:text/plain,%%%02X", i)) + check("foo"+string(rune(i)), fmt.Sprintf("data:text/plain,foo%%%02X", i)) + } else { + check(string(rune(i)), fmt.Sprintf("data:text/plain,%c", i)) + check("foo"+string(rune(i)), fmt.Sprintf("data:text/plain,foo%c", i)) + } + + if alwaysEscape { + check(string(rune(i))+"foo", fmt.Sprintf("data:text/plain,%%%02Xfoo", i)) + } else { + check(string(rune(i))+"foo", fmt.Sprintf("data:text/plain,%cfoo", i)) + } + } + + // Test leading vs. trailing + check(" \t ", "data:text/plain, %09%20") + check(" \n ", "data:text/plain, %0A%20") + check(" \r ", "data:text/plain, %0D%20") + check(" # ", "data:text/plain, %23%20") + check("\x08#\x08", "data:text/plain,\x08%23%08") + + // Only "%" symbols that could form an escape need to be escaped + check("%, %3, %33, %333", "data:text/plain,%, %3, %2533, %25333") +} diff --git a/scripts/dataurl-escapes.html b/scripts/dataurl-escapes.html new file mode 100644 index 00000000000..c724b73f127 --- /dev/null +++ b/scripts/dataurl-escapes.html @@ -0,0 +1,55 @@ + +

+ This script checks to see what characters need to be escaped in a data URL + (in addition to % for percent-encoded hexadecimal escapes) for a browser to + parse it correctly. This information is used to implement esbuild's + dataurl loader. Here is what your current browser requires: +

+

+

+ The answer that works across Chrome, Firefox, and Safari appears to be: +
+
Always percent-encode these values: 0x09, 0x0A, 0x0D, 0x23 +
Only percent-encode these values in the trailing position: 0x00 to 0x08, 0x0B, 0x0C, 0x0E to 0x20 +

+ diff --git a/scripts/js-api-tests.js b/scripts/js-api-tests.js index 93fb87ee898..e56bf33476d 100644 --- a/scripts/js-api-tests.js +++ b/scripts/js-api-tests.js @@ -1439,12 +1439,12 @@ body { }, outputs: { [makePath(output)]: { - bytes: 263, + bytes: 253, entryPoint: makePath(entry), imports: [], inputs: { [makePath(entry)]: { bytesInOutput: 62 }, - [makePath(imported)]: { bytesInOutput: 61 }, + [makePath(imported)]: { bytesInOutput: 51 }, }, }, [makePath(output + '.map')]: { @@ -4770,8 +4770,14 @@ let transformTests = { }, async dataurl({ esbuild }) { - const { code } = await esbuild.transform(`\x00\x01\x02`, { loader: 'dataurl' }) - assert.strictEqual(code, `module.exports = "data:application/octet-stream;base64,AAEC";\n`) + const { code: code1 } = await esbuild.transform(`\x00\x01\x02`, { loader: 'dataurl' }) + assert.strictEqual(code1, `module.exports = "data:application/octet-stream,%00%01%02";\n`) + + const { code: code2 } = await esbuild.transform(`\xFD\xFE\xFF`, { loader: 'dataurl' }) + assert.strictEqual(code2, `module.exports = "data:text/plain;charset=utf-8,\\xFD\\xFE\\xFF";\n`) + + const { code: code3 } = await esbuild.transform(new Uint8Array([0xFD, 0xFE, 0xFF]), { loader: 'dataurl' }) + assert.strictEqual(code3, `module.exports = "data:text/plain;charset=utf-8;base64,/f7/";\n`) }, async sourceMapTrueWithName({ esbuild }) {