Skip to content

Commit

Permalink
fix #1843: generate shorter data urls if possible
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Dec 7, 2022
1 parent 5abe071 commit 895f50c
Show file tree
Hide file tree
Showing 9 changed files with 324 additions and 27 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Expand Up @@ -67,6 +67,10 @@

The `define` parameter appears at first glance to take a JSON object if you aren't paying close attention, but this actually isn't true. Values for `define` are instead strings of JavaScript code. This means you have to use `define: { foo: '"bar"' }` to replace `foo` with the string `"bar"`. Using `define: { foo: 'bar' }` actually replaces `foo` with the identifier `bar`. Previously esbuild allowed you to pass `define: { foo: false }` and `false` was automatically converted into a string, which made it more confusing to understand what `define` actually represents. Starting with this release, passing non-string values such as with `define: { foo: false }` will no longer be allowed. You will now have to write `define: { foo: 'false' }` instead.

* Generate shorter data URLs if possible ([#1843](https://github.com/evanw/esbuild/issues/1843))

Loading a file with esbuild's `dataurl` loader generates a JavaScript module with a [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) for that file in a string as a single default export. Previously the data URLs generated by esbuild all used [base64 encoding](https://en.wikipedia.org/wiki/Base64). However, this is unnecessarily long for most textual data (e.g. SVG images). So with this release, esbuild's `dataurl` loader will now use percent encoding instead of base64 encoding if the result will be shorter. This can result in ~25% smaller data URLs for large SVGs. If you want the old behavior, you can use the `base64` loader instead and then construct the data URL yourself.

* Avoid marking entry points as external ([#2382](https://github.com/evanw/esbuild/issues/2382))

Previously you couldn't specify `--external:*` to mark all import paths as external because that also ended up making the entry point itself external, which caused the build to fail. With this release, esbuild's `external` API parameter no longer applies to entry points so using `--external:*` is now possible.
Expand Down
3 changes: 3 additions & 0 deletions internal/bundler/bundler.go
Expand Up @@ -288,6 +288,9 @@ func parseFile(args parseArgs) {
mimeType := guessMimeType(ext, source.Contents)
encoded := base64.StdEncoding.EncodeToString([]byte(source.Contents))
url := fmt.Sprintf("data:%s;base64,%s", mimeType, encoded)
if percentURL, ok := helpers.EncodeStringAsPercentEscapedDataURL(mimeType, source.Contents); ok && len(percentURL) < len(url) {
url = percentURL
}
expr := js_ast.Expr{Data: &js_ast.EString{Value: helpers.StringToUTF16(url)}}
ast := js_parser.LazyExportAST(args.log, source, js_parser.OptionsFromConfig(&args.options), expr, "")
ast.URLForCSS = url
Expand Down
80 changes: 80 additions & 0 deletions internal/bundler/bundler_loader_test.go
Expand Up @@ -1040,6 +1040,86 @@ func TestLoaderDataURLExtensionBasedMIME(t *testing.T) {
})
}

// Percent-encoded data URLs should switch over to base64
// data URLs if it would result in a smaller size
func TestLoaderDataURLBase64VsPercentEncoding(t *testing.T) {
loader_suite.expectBundled(t, bundled{
files: map[string]string{
"/entry.js": `
import a from './shouldUsePercent_1.txt'
import b from './shouldUsePercent_2.txt'
import c from './shouldUseBase64_1.txt'
import d from './shouldUseBase64_2.txt'
console.log(
a,
b,
c,
d,
)
`,
"/shouldUsePercent_1.txt": "\n\n\n",
"/shouldUsePercent_2.txt": "\n\n\n\n",
"/shouldUseBase64_1.txt": "\n\n\n\n\n",
"/shouldUseBase64_2.txt": "\n\n\n\n\n\n",
},
entryPaths: []string{"/entry.js"},
options: config.Options{
Mode: config.ModeBundle,
AbsOutputFile: "/out.js",
ExtensionToLoader: map[string]config.Loader{
".js": config.LoaderJS,
".txt": config.LoaderDataURL,
},
},
})
}

func TestLoaderDataURLBase64InvalidUTF8(t *testing.T) {
loader_suite.expectBundled(t, bundled{
files: map[string]string{
"/entry.js": `
import a from './binary.txt'
console.log(a)
`,
"/binary.txt": "\xFF",
},
entryPaths: []string{"/entry.js"},
options: config.Options{
Mode: config.ModeBundle,
AbsOutputFile: "/out.js",
ExtensionToLoader: map[string]config.Loader{
".js": config.LoaderJS,
".txt": config.LoaderDataURL,
},
},
})
}

func TestLoaderDataURLEscapePercents(t *testing.T) {
loader_suite.expectBundled(t, bundled{
files: map[string]string{
"/entry.js": `
import a from './percents.txt'
console.log(a)
`,
"/percents.txt": `
%, %3, %33, %333
%, %e, %ee, %eee
%, %E, %EE, %EEE
`,
},
entryPaths: []string{"/entry.js"},
options: config.Options{
Mode: config.ModeBundle,
AbsOutputFile: "/out.js",
ExtensionToLoader: map[string]config.Loader{
".js": config.LoaderJS,
".txt": config.LoaderDataURL,
},
},
})
}

func TestLoaderCopyWithBundleFromJS(t *testing.T) {
default_suite.expectBundled(t, bundled{
files: map[string]string{
Expand Down
42 changes: 21 additions & 21 deletions internal/bundler/snapshots/snapshots_default.txt
Expand Up @@ -2056,67 +2056,67 @@ console.log([
TestLoaderDataURLExtensionBasedMIME
---------- /out/entry.js ----------
// example.css
var example_default = "data:text/css;charset=utf-8;base64,Y3Nz";
var example_default = "data:text/css;charset=utf-8,css";

// example.eot
var example_default2 = "data:application/vnd.ms-fontobject;base64,ZW90";
var example_default2 = "data:application/vnd.ms-fontobject,eot";

// example.gif
var example_default3 = "data:image/gif;base64,Z2lm";
var example_default3 = "data:image/gif,gif";

// example.htm
var example_default4 = "data:text/html;charset=utf-8;base64,aHRt";
var example_default4 = "data:text/html;charset=utf-8,htm";

// example.html
var example_default5 = "data:text/html;charset=utf-8;base64,aHRtbA==";
var example_default5 = "data:text/html;charset=utf-8,html";

// example.jpeg
var example_default6 = "data:image/jpeg;base64,anBlZw==";
var example_default6 = "data:image/jpeg,jpeg";

// example.jpg
var example_default7 = "data:image/jpeg;base64,anBn";
var example_default7 = "data:image/jpeg,jpg";

// example.js
var example_default8 = "data:text/javascript;charset=utf-8;base64,anM=";
var example_default8 = "data:text/javascript;charset=utf-8,js";

// example.json
var example_default9 = "data:application/json;base64,anNvbg==";
var example_default9 = "data:application/json,json";

// example.mjs
var example_default10 = "data:text/javascript;charset=utf-8;base64,bWpz";
var example_default10 = "data:text/javascript;charset=utf-8,mjs";

// example.otf
var example_default11 = "data:font/otf;base64,b3Rm";
var example_default11 = "data:font/otf,otf";

// example.pdf
var example_default12 = "data:application/pdf;base64,cGRm";
var example_default12 = "data:application/pdf,pdf";

// example.png
var example_default13 = "data:image/png;base64,cG5n";
var example_default13 = "data:image/png,png";

// example.sfnt
var example_default14 = "data:font/sfnt;base64,c2ZudA==";
var example_default14 = "data:font/sfnt,sfnt";

// example.svg
var example_default15 = "data:image/svg+xml;base64,c3Zn";
var example_default15 = "data:image/svg+xml,svg";

// example.ttf
var example_default16 = "data:font/ttf;base64,dHRm";
var example_default16 = "data:font/ttf,ttf";

// example.wasm
var example_default17 = "data:application/wasm;base64,d2FzbQ==";
var example_default17 = "data:application/wasm,wasm";

// example.webp
var example_default18 = "data:image/webp;base64,d2VicA==";
var example_default18 = "data:image/webp,webp";

// example.woff
var example_default19 = "data:font/woff;base64,d29mZg==";
var example_default19 = "data:font/woff,woff";

// example.woff2
var example_default20 = "data:font/woff2;base64,d29mZjI=";
var example_default20 = "data:font/woff2,woff2";

// example.xml
var example_default21 = "data:text/xml;charset=utf-8;base64,eG1s";
var example_default21 = "data:text/xml;charset=utf-8,xml";
export {
example_default as css,
example_default2 as eot,
Expand Down
45 changes: 43 additions & 2 deletions internal/bundler/snapshots/snapshots_loader.txt
Expand Up @@ -128,23 +128,64 @@ var y_default = "eQ==";
var x_b64 = require_x();
console.log(x_b64, y_default);

================================================================================
TestLoaderDataURLBase64InvalidUTF8
---------- /out.js ----------
// binary.txt
var binary_default = "data:text/plain;charset=utf-8;base64,/w==";

// entry.js
console.log(binary_default);

================================================================================
TestLoaderDataURLBase64VsPercentEncoding
---------- /out.js ----------
// shouldUsePercent_1.txt
var shouldUsePercent_1_default = "data:text/plain;charset=utf-8,%0A%0A%0A";

// shouldUsePercent_2.txt
var shouldUsePercent_2_default = "data:text/plain;charset=utf-8,%0A%0A%0A%0A";

// shouldUseBase64_1.txt
var shouldUseBase64_1_default = "data:text/plain;charset=utf-8;base64,CgoKCgo=";

// shouldUseBase64_2.txt
var shouldUseBase64_2_default = "data:text/plain;charset=utf-8;base64,CgoKCgoK";

// entry.js
console.log(
shouldUsePercent_1_default,
shouldUsePercent_2_default,
shouldUseBase64_1_default,
shouldUseBase64_2_default
);

================================================================================
TestLoaderDataURLCommonJSAndES6
---------- /out.js ----------
// x.txt
var require_x = __commonJS({
"x.txt"(exports, module) {
module.exports = "data:text/plain;charset=utf-8;base64,eA==";
module.exports = "data:text/plain;charset=utf-8,x";
}
});

// y.txt
var y_default = "data:text/plain;charset=utf-8;base64,eQ==";
var y_default = "data:text/plain;charset=utf-8,y";

// entry.js
var x_url = require_x();
console.log(x_url, y_default);

================================================================================
TestLoaderDataURLEscapePercents
---------- /out.js ----------
// percents.txt
var percents_default = "data:text/plain;charset=utf-8,%0A%, %3, %2533, %25333%0A%, %e, %25ee, %25eee%0A%, %E, %25EE, %25EEE%0A";

// entry.js
console.log(percents_default);

================================================================================
TestLoaderFile
---------- /out/test-IPILGNO5.svg ----------
Expand Down
60 changes: 60 additions & 0 deletions internal/helpers/dataurl.go
@@ -0,0 +1,60 @@
package helpers

import (
"strings"
"unicode/utf8"
)

// See "scripts/dataurl-escapes.html" for how this was derived
func EncodeStringAsPercentEscapedDataURL(mimeType string, text string) (string, bool) {
hex := "0123456789ABCDEF"
sb := strings.Builder{}
n := len(text)
i := 0
runStart := 0
sb.WriteString("data:")
sb.WriteString(mimeType)
sb.WriteByte(',')

// Scan for trailing characters that need to be escaped
trailingStart := n
for trailingStart > 0 {
if c := text[trailingStart-1]; c > 0x20 || c == '\t' || c == '\n' || c == '\r' {
break
}
trailingStart--
}

for i < n {
c, width := utf8.DecodeRuneInString(text[i:])

// We can't encode invalid UTF-8 data
if c == utf8.RuneError && width == 1 {
return "", false
}

// Escape this character if needed
if c == '\t' || c == '\n' || c == '\r' || c == '#' || i >= trailingStart ||
(c == '%' && i+2 < n && isHex(text[i+1]) && isHex(text[i+2])) {
if runStart < i {
sb.WriteString(text[runStart:i])
}
sb.WriteByte('%')
sb.WriteByte(hex[c>>4])
sb.WriteByte(hex[c&15])
runStart = i + width
}

i += width
}

if runStart < n {
sb.WriteString(text[runStart:])
}

return sb.String(), true
}

func isHex(c byte) bool {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
}
48 changes: 48 additions & 0 deletions internal/helpers/dataurl_test.go
@@ -0,0 +1,48 @@
package helpers_test

import (
"fmt"
"testing"

"github.com/evanw/esbuild/internal/helpers"
)

func TestEncodeDataURL(t *testing.T) {
check := func(raw string, expected string) {
url, ok := helpers.EncodeStringAsPercentEscapedDataURL("text/plain", raw)
if !ok {
t.Fatalf("Failed to encode %q", raw)
} else if url != expected {
t.Fatalf("Got %q but expected %q", url, expected)
}
}

for i := 0; i <= 0xFF; i++ {
alwaysEscape := i == '\t' || i == '\r' || i == '\n' || i == '#'
trailingEscape := i <= 0x20 || i == '#'

if trailingEscape {
check(string(rune(i)), fmt.Sprintf("data:text/plain,%%%02X", i))
check("foo"+string(rune(i)), fmt.Sprintf("data:text/plain,foo%%%02X", i))
} else {
check(string(rune(i)), fmt.Sprintf("data:text/plain,%c", i))
check("foo"+string(rune(i)), fmt.Sprintf("data:text/plain,foo%c", i))
}

if alwaysEscape {
check(string(rune(i))+"foo", fmt.Sprintf("data:text/plain,%%%02Xfoo", i))
} else {
check(string(rune(i))+"foo", fmt.Sprintf("data:text/plain,%cfoo", i))
}
}

// Test leading vs. trailing
check(" \t ", "data:text/plain, %09%20")
check(" \n ", "data:text/plain, %0A%20")
check(" \r ", "data:text/plain, %0D%20")
check(" # ", "data:text/plain, %23%20")
check("\x08#\x08", "data:text/plain,\x08%23%08")

// Only "%" symbols that could form an escape need to be escaped
check("%, %3, %33, %333", "data:text/plain,%, %3, %2533, %25333")
}

0 comments on commit 895f50c

Please sign in to comment.