Skip to content

Commit

Permalink
Use native decoding of base64 in Node.js (#269)
Browse files Browse the repository at this point in the history
* Make decoding of base64 data URIs faster

I saw in microsoft/vscode-js-debug#1911 that
base64 decoding of a data URI was taking a bit of time.

This PR feature-detects the presence of a global `Buffer` to use native
decoding when running in Node.js, which is about 25x faster on a 10MB
data URI than the JS implementation in the library.

I have a bit of a hack in order to test both paths when running tests,
happy to change it if desired :)

* use conditional exports

* fix test import

* fix node test import, and set stringToBuffer encoding

* Create few-adults-rhyme.md

---------

Co-authored-by: Nathan Rajlich <n@n8.io>
  • Loading branch information
connor4312 and TooTallNate committed Jan 8, 2024
1 parent c3c405e commit c881a18
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 61 deletions.
5 changes: 5 additions & 0 deletions .changeset/few-adults-rhyme.md
@@ -0,0 +1,5 @@
---
"data-uri-to-buffer": patch
---

Use native Buffer decoding in Node.js
4 changes: 4 additions & 0 deletions packages/data-uri-to-buffer/package.json
Expand Up @@ -4,6 +4,10 @@
"description": "Create an ArrayBuffer instance from a Data URI string",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"exports": {
"node": "./dist/node.js",
"default": "./dist/index.js"
},
"files": [
"dist"
],
Expand Down
69 changes: 69 additions & 0 deletions packages/data-uri-to-buffer/src/common.ts
@@ -0,0 +1,69 @@
export interface ParsedDataURI {
type: string;
typeFull: string;
charset: string;
buffer: ArrayBuffer;
}

export interface IBufferConversions {
base64ToArrayBuffer(base64: string): ArrayBuffer;
stringToBuffer(str: string): ArrayBuffer;
}

/**
* Returns a `Buffer` instance from the given data URI `uri`.
*
* @param {String} uri Data URI to turn into a Buffer instance
*/
export const makeDataUriToBuffer = (convert: IBufferConversions) => (uri: string | URL): ParsedDataURI => {
uri = String(uri);

if (!/^data:/i.test(uri)) {
throw new TypeError(
'`uri` does not appear to be a Data URI (must begin with "data:")'
);
}

// strip newlines
uri = uri.replace(/\r?\n/g, '');

// split the URI up into the "metadata" and the "data" portions
const firstComma = uri.indexOf(',');
if (firstComma === -1 || firstComma <= 4) {
throw new TypeError('malformed data: URI');
}

// remove the "data:" scheme and parse the metadata
const meta = uri.substring(5, firstComma).split(';');

let charset = '';
let base64 = false;
const type = meta[0] || 'text/plain';
let typeFull = type;
for (let i = 1; i < meta.length; i++) {
if (meta[i] === 'base64') {
base64 = true;
} else if (meta[i]) {
typeFull += `;${meta[i]}`;
if (meta[i].indexOf('charset=') === 0) {
charset = meta[i].substring(8);
}
}
}
// defaults to US-ASCII only if type is not provided
if (!meta[0] && !charset.length) {
typeFull += ';charset=US-ASCII';
charset = 'US-ASCII';
}

// get the encoded data portion and decode URI-encoded chars
const data = unescape(uri.substring(firstComma + 1));
const buffer = base64 ? convert.base64ToArrayBuffer(data) : convert.stringToBuffer(data);

return {
type,
typeFull,
charset,
buffer,
};
}
62 changes: 4 additions & 58 deletions packages/data-uri-to-buffer/src/index.ts
@@ -1,9 +1,6 @@
export interface ParsedDataURI {
type: string;
typeFull: string;
charset: string;
buffer: ArrayBuffer;
}
import { makeDataUriToBuffer } from './common';

export type { ParsedDataURI } from './common';

function base64ToArrayBuffer(base64: string) {
const chars =
Expand Down Expand Up @@ -58,55 +55,4 @@ function stringToBuffer(str: string): ArrayBuffer {
*
* @param {String} uri Data URI to turn into a Buffer instance
*/
export function dataUriToBuffer(uri: string | URL): ParsedDataURI {
uri = String(uri);

if (!/^data:/i.test(uri)) {
throw new TypeError(
'`uri` does not appear to be a Data URI (must begin with "data:")'
);
}

// strip newlines
uri = uri.replace(/\r?\n/g, '');

// split the URI up into the "metadata" and the "data" portions
const firstComma = uri.indexOf(',');
if (firstComma === -1 || firstComma <= 4) {
throw new TypeError('malformed data: URI');
}

// remove the "data:" scheme and parse the metadata
const meta = uri.substring(5, firstComma).split(';');

let charset = '';
let base64 = false;
const type = meta[0] || 'text/plain';
let typeFull = type;
for (let i = 1; i < meta.length; i++) {
if (meta[i] === 'base64') {
base64 = true;
} else if (meta[i]) {
typeFull += `;${meta[i]}`;
if (meta[i].indexOf('charset=') === 0) {
charset = meta[i].substring(8);
}
}
}
// defaults to US-ASCII only if type is not provided
if (!meta[0] && !charset.length) {
typeFull += ';charset=US-ASCII';
charset = 'US-ASCII';
}

// get the encoded data portion and decode URI-encoded chars
const data = unescape(uri.substring(firstComma + 1));
const buffer = base64 ? base64ToArrayBuffer(data) : stringToBuffer(data);

return {
type,
typeFull,
charset,
buffer,
};
}
export const dataUriToBuffer = makeDataUriToBuffer({ stringToBuffer, base64ToArrayBuffer });
28 changes: 28 additions & 0 deletions packages/data-uri-to-buffer/src/node.ts
@@ -0,0 +1,28 @@
import { makeDataUriToBuffer } from './common';

export type { ParsedDataURI } from './common';

function nodeBuffertoArrayBuffer(nodeBuf: Buffer) {
if (nodeBuf.byteLength === nodeBuf.buffer.byteLength) {
return nodeBuf.buffer; // large strings may get their own memory allocation
}
const buffer = new ArrayBuffer(nodeBuf.byteLength);
const view = new Uint8Array(buffer);
view.set(nodeBuf);
return buffer;
}

function base64ToArrayBuffer(base64: string) {
return nodeBuffertoArrayBuffer(Buffer.from(base64, 'base64'));
}

function stringToBuffer(str: string): ArrayBuffer {
return nodeBuffertoArrayBuffer(Buffer.from(str, 'ascii'));
}

/**
* Returns a `Buffer` instance from the given data URI `uri`.
*
* @param {String} uri Data URI to turn into a Buffer instance
*/
export const dataUriToBuffer = makeDataUriToBuffer({ stringToBuffer, base64ToArrayBuffer });
10 changes: 7 additions & 3 deletions packages/data-uri-to-buffer/test/data-uri-to-buffer.test.ts
@@ -1,7 +1,11 @@
import assert from 'assert';
import { dataUriToBuffer } from '../src';
import { dataUriToBuffer as baseline } from '../src/index';
import { dataUriToBuffer as node } from '../src/node';

describe('data-uri-to-buffer', function () {
describe('node', () => doTest(node));
describe('baseline', () => doTest(baseline));

function doTest(dataUriToBuffer: typeof baseline) {
it('should decode bare-bones Data URIs', function () {
const uri = 'data:,Hello%2C%20World!';

Expand Down Expand Up @@ -187,4 +191,4 @@ describe('data-uri-to-buffer', function () {
assert.equal('UTF-8', parsed.charset);
assert.equal('abc', Buffer.from(parsed.buffer).toString());
});
});
}

1 comment on commit c881a18

@vercel
Copy link

@vercel vercel bot commented on c881a18 Jan 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.