Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data URL support #95

Merged
merged 17 commits into from Sep 20, 2019
2 changes: 1 addition & 1 deletion index.d.ts
Expand Up @@ -192,7 +192,7 @@ declare const normalizeUrl: {
/**
[Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL.

@param url - URL to normalize.
@param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs).

@example
```
Expand Down
55 changes: 54 additions & 1 deletion index.js
Expand Up @@ -6,6 +6,53 @@ const testParameter = (name, filters) => {
return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
};

const normalizeDataURL = urlString => {
const parts = urlString.trim().match(/^data:(.*?),(.*)$/);

if (!parts) {
throw new Error(`Invalid URL: ${urlString}`);
}

const mediaType = parts[1].split(';');
const body = parts[2];

let base64 = false;

if (mediaType[mediaType.length - 1] === 'base64') {
mediaType.pop();
base64 = true;
}

// Lowercase MIME type
const mimeType = (mediaType.shift() || '').toLowerCase();
const attributes = mediaType
.filter(Boolean)
.map(attribute => {
let [key, value = ''] = attribute.split('=').map(string => string.trim());

// Lowercase `charset`
if (key === 'charset') {
value = value.toLowerCase();
}

return `${key}=${value}`;
});

const normalizedMediaType = [
...attributes
];

if (base64) {
normalizedMediaType.push('base64');
}

if (normalizedMediaType.length !== 0 || mimeType) {
normalizedMediaType.unshift(mimeType);
}

return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}`;
};

const normalizeUrl = (urlString, options) => {
options = {
defaultProtocol: 'http:',
Expand Down Expand Up @@ -41,7 +88,7 @@ const normalizeUrl = (urlString, options) => {
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);

// Prepend protocol
if (!isRelativeUrl) {
if (!isRelativeUrl && !/^data:/i.test(urlString)) {
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
}

Expand Down Expand Up @@ -130,6 +177,12 @@ const normalizeUrl = (urlString, options) => {
urlObj.searchParams.sort();
}

// Data URL
if (urlObj.protocol === 'data:') {
const url = normalizeDataURL(`${urlObj.protocol}${urlObj.pathname}`);
return `${url}${urlObj.search}${urlObj.hash}`;
}

if (options.removeTrailingSlash) {
urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
}
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Expand Up @@ -33,7 +33,7 @@ normalizeUrl('HTTP://xn--xample-hva.com:80/?b=bar&a=foo');

Type: `string`

URL to normalize.
URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs).

#### options

Expand Down
43 changes: 43 additions & 0 deletions test.js
Expand Up @@ -204,3 +204,46 @@ test('remove duplicate pathname slashes', t => {
t.is(normalizeUrl('http://sindresorhus.com:5000//foo'), 'http://sindresorhus.com:5000/foo');
t.is(normalizeUrl('http://sindresorhus.com//foo'), 'http://sindresorhus.com/foo');
});

test('data URL', t => {
// Invalid URL.
t.throws(() => normalizeUrl('data:'), 'Invalid URL: data:');

// Normalize away trailing semicolon.
t.is(normalizeUrl('data:text/plain;charset=UTF-8;,foo'), 'data:text/plain;charset=utf-8,foo');

// Empty MIME type.
t.is(normalizeUrl('data:,'), 'data:,');

// Empty MIME type with charset.
t.is(normalizeUrl('data:;charset=utf-8,foo'), 'data:;charset=utf-8,foo');

// Lowercase the MIME type.
t.is(normalizeUrl('data:TEXT/plain,foo'), 'data:text/plain,foo');

// Lowercase the charset.
t.is(normalizeUrl('data:text/plain;charset=UTF-8,foo'), 'data:text/plain;charset=utf-8,foo');

// Remove spaces after the comma when it's base64.
t.is(normalizeUrl('data:image/gif;base64, R0lGODlhAQABAAAAACw= ?foo=bar'), 'data:image/gif;base64,R0lGODlhAQABAAAAACw=?foo=bar');

// Keep spaces when it's not base64.
t.is(normalizeUrl('data:text/plain;charset=utf-8, foo ?foo=bar'), 'data:text/plain;charset=utf-8, foo?foo=bar');

// Data URL with query and hash.
t.is(normalizeUrl('data:image/gif;base64,R0lGODlhAQABAAAAACw=?foo=bar#baz'), 'data:image/gif;base64,R0lGODlhAQABAAAAACw=?foo=bar#baz');

// Options.
t.is(normalizeUrl('data:text/plain;charset=utf-8,www.foo/index.html?foo=bar&a=a&utm_medium=test#baz', {
defaultProtocol: 'http:',
normalizeProtocol: true,
forceHttp: true,
stripHash: true,
stripWWW: true,
stripProtocol: true,
removeQueryParameters: [/^utm_\w+/i, 'ref'],
sortQueryParameters: true,
removeTrailingSlash: true,
removeDirectoryIndex: true
}), 'data:text/plain;charset=utf-8,www.foo/index.html?a=a&foo=bar');
});