diff --git a/index.js b/index.js index 4c99af1..4eea255 100644 --- a/index.js +++ b/index.js @@ -2,6 +2,10 @@ // TODO: Use the `URL` global when targeting Node.js 10 const URLParser = typeof URL === 'undefined' ? require('url').URL : URL; +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs +const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; +const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; + const testParameter = (name, filters) => { return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); }; @@ -27,17 +31,21 @@ const normalizeDataURL = (urlString, {stripHash}) => { // Lowercase MIME type const mimeType = (mediaType.shift() || '').toLowerCase(); const attributes = mediaType - .filter(Boolean) .map(attribute => { let [key, value = ''] = attribute.split('=').map(string => string.trim()); // Lowercase `charset` if (key === 'charset') { value = value.toLowerCase(); + + if (value === DATA_URL_DEFAULT_CHARSET) { + return ''; + } } return `${key}${value ? `=${value}` : ''}`; - }); + }) + .filter(Boolean); const normalizedMediaType = [ ...attributes @@ -47,7 +55,7 @@ const normalizeDataURL = (urlString, {stripHash}) => { normalizedMediaType.push('base64'); } - if (normalizedMediaType.length !== 0 || mimeType) { + if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { normalizedMediaType.unshift(mimeType); } diff --git a/test.js b/test.js index efcda01..dba2e13 100644 --- a/test.js +++ b/test.js @@ -216,8 +216,14 @@ test('data URL', t => { // Invalid URL. t.throws(() => normalizeUrl('data:'), 'Invalid URL: data:'); + // Strip default MIME type + t.is(normalizeUrl('data:text/plain,foo'), 'data:,foo'); + + // Strip default charset + t.is(normalizeUrl('data:;charset=us-ascii,foo'), 'data:,foo'); + // Normalize away trailing semicolon. - t.is(normalizeUrl('data:text/plain;charset=UTF-8;,foo'), 'data:text/plain;charset=utf-8,foo'); + t.is(normalizeUrl('data:;charset=UTF-8;,foo'), 'data:;charset=utf-8,foo'); // Empty MIME type. t.is(normalizeUrl('data:,'), 'data:,'); @@ -226,23 +232,22 @@ test('data URL', t => { t.is(normalizeUrl('data:;charset=utf-8,foo'), 'data:;charset=utf-8,foo'); // Lowercase the MIME type. - t.is(normalizeUrl('data:TEXT/plain,foo'), 'data:text/plain,foo'); + t.is(normalizeUrl('data:TEXT/HTML,foo'), 'data:text/html,foo'); // Strip empty hash. t.is(normalizeUrl('data:,foo# '), 'data:,foo'); // Key only mediaType attribute. - t.is(normalizeUrl('data:text/plain;foo=,'), 'data:text/plain;foo,'); - t.is(normalizeUrl('data:text/plain; foo,'), 'data:text/plain;foo,'); + t.is(normalizeUrl('data:;foo=;bar,'), 'data:;foo;bar,'); // Lowercase the charset. - t.is(normalizeUrl('data:text/plain;charset=UTF-8,foo'), 'data:text/plain;charset=utf-8,foo'); + t.is(normalizeUrl('data:;charset=UTF-8,foo'), 'data:;charset=utf-8,foo'); // Remove spaces after the comma when it's base64. - t.is(normalizeUrl('data:image/gif;base64, R0lGODlhAQABAAAAACw= #foo #bar'), '#foo #bar'); + t.is(normalizeUrl('data:;base64, Zm9v #foo #bar'), 'data:;base64,Zm9v#foo #bar'); // Keep spaces when it's not base64. - t.is(normalizeUrl('data:text/plain;charset=utf-8, foo #bar'), 'data:text/plain;charset=utf-8, foo #bar'); + t.is(normalizeUrl('data:, foo #bar'), 'data:, foo #bar'); // Options. const options = {