diff --git a/index.d.ts b/index.d.ts index 6257bf1..7e332f2 100644 --- a/index.d.ts +++ b/index.d.ts @@ -192,7 +192,7 @@ declare const normalizeUrl: { /** [Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL. - @param url - URL to normalize. + @param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). @example ``` diff --git a/index.js b/index.js index ac386a0..20ab9de 100644 --- a/index.js +++ b/index.js @@ -6,6 +6,53 @@ const testParameter = (name, filters) => { return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); }; +const normalizeDataURL = urlString => { + const parts = urlString.trim().match(/^data:(.*?),(.*)$/); + + if (!parts) { + throw new Error(`Invalid URL: ${urlString}`); + } + + const mediaType = parts[1].split(';'); + const body = parts[2]; + + let base64 = false; + + if (mediaType[mediaType.length - 1] === 'base64') { + mediaType.pop(); + base64 = true; + } + + // Lowercase MIME type + const mimeType = (mediaType.shift() || '').toLowerCase(); + const attributes = mediaType + .filter(Boolean) + .map(attribute => { + let [key, value = ''] = attribute.split('=').map(string => string.trim()); + + // Lowercase `charset` + if (key === 'charset') { + value = value.toLowerCase(); + } + + return `${key}=${value}`; + }); + + const normalizedMediaType = [ + ...attributes + ]; + + if (base64) { + normalizedMediaType.push('base64'); + } + + if (normalizedMediaType.length !== 0 || mimeType) { + normalizedMediaType.unshift(mimeType); + } + + return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}`; +}; + const normalizeUrl = (urlString, options) => { options = { defaultProtocol: 'http:', @@ -41,7 +88,7 @@ const normalizeUrl = (urlString, options) => { const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); // Prepend protocol - if (!isRelativeUrl) { + if (!isRelativeUrl && !/^data:/i.test(urlString)) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); } @@ -130,6 +177,12 @@ const normalizeUrl = (urlString, options) => { urlObj.searchParams.sort(); } + // Data URL + if (urlObj.protocol === 'data:') { + const url = normalizeDataURL(`${urlObj.protocol}${urlObj.pathname}`); + return `${url}${urlObj.search}${urlObj.hash}`; + } + if (options.removeTrailingSlash) { urlObj.pathname = urlObj.pathname.replace(/\/$/, ''); } diff --git a/readme.md b/readme.md index 4d9e5eb..a851fdd 100644 --- a/readme.md +++ b/readme.md @@ -33,7 +33,7 @@ normalizeUrl('HTTP://xn--xample-hva.com:80/?b=bar&a=foo'); Type: `string` -URL to normalize. +URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). #### options diff --git a/test.js b/test.js index 5fbc8b4..b65347f 100644 --- a/test.js +++ b/test.js @@ -204,3 +204,46 @@ test('remove duplicate pathname slashes', t => { t.is(normalizeUrl('http://sindresorhus.com:5000//foo'), 'http://sindresorhus.com:5000/foo'); t.is(normalizeUrl('http://sindresorhus.com//foo'), 'http://sindresorhus.com/foo'); }); + +test('data URL', t => { + // Invalid URL. + t.throws(() => normalizeUrl('data:'), 'Invalid URL: data:'); + + // Normalize away trailing semicolon. + t.is(normalizeUrl('data:text/plain;charset=UTF-8;,foo'), 'data:text/plain;charset=utf-8,foo'); + + // Empty MIME type. + t.is(normalizeUrl('data:,'), 'data:,'); + + // Empty MIME type with charset. + t.is(normalizeUrl('data:;charset=utf-8,foo'), 'data:;charset=utf-8,foo'); + + // Lowercase the MIME type. + t.is(normalizeUrl('data:TEXT/plain,foo'), 'data:text/plain,foo'); + + // Lowercase the charset. + t.is(normalizeUrl('data:text/plain;charset=UTF-8,foo'), 'data:text/plain;charset=utf-8,foo'); + + // Remove spaces after the comma when it's base64. + t.is(normalizeUrl('data:image/gif;base64, R0lGODlhAQABAAAAACw= ?foo=bar'), '?foo=bar'); + + // Keep spaces when it's not base64. + t.is(normalizeUrl('data:text/plain;charset=utf-8, foo ?foo=bar'), 'data:text/plain;charset=utf-8, foo?foo=bar'); + + // Data URL with query and hash. + t.is(normalizeUrl('?foo=bar#baz'), '?foo=bar#baz'); + + // Options. + t.is(normalizeUrl('data:text/plain;charset=utf-8,www.foo/index.html?foo=bar&a=a&utm_medium=test#baz', { + defaultProtocol: 'http:', + normalizeProtocol: true, + forceHttp: true, + stripHash: true, + stripWWW: true, + stripProtocol: true, + removeQueryParameters: [/^utm_\w+/i, 'ref'], + sortQueryParameters: true, + removeTrailingSlash: true, + removeDirectoryIndex: true + }), 'data:text/plain;charset=utf-8,www.foo/index.html?a=a&foo=bar'); +});