From c93b4aee6e170481827baf5062c12f3137f73c6a Mon Sep 17 00:00:00 2001 From: Hiroki Osame Date: Wed, 3 Aug 2022 12:25:08 -0400 Subject: [PATCH 1/2] fix: cjs to load normalize-url --- dist/index.js | 251 +++++++++++++++++++++++++++++++++++++++++++++- dist/index.mjs | 248 ++++++++++++++++++++++++++++++++++++++++++++- package-lock.json | 1 - package.json | 1 - 4 files changed, 495 insertions(+), 6 deletions(-) diff --git a/dist/index.js b/dist/index.js index d9100ec..4c0f27e 100755 --- a/dist/index.js +++ b/dist/index.js @@ -1,12 +1,257 @@ 'use strict'; var parsePath = require('parse-path'); -var normalizeUrl = require('normalize-url'); function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; } var parsePath__default = /*#__PURE__*/_interopDefaultLegacy(parsePath); -var normalizeUrl__default = /*#__PURE__*/_interopDefaultLegacy(normalizeUrl); + +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs +const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; +const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; + +const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); + +const normalizeDataURL = (urlString, {stripHash}) => { + const match = /^data:(?[^,]*?),(?[^#]*?)(?:#(?.*))?$/.exec(urlString); + + if (!match) { + throw new Error(`Invalid URL: ${urlString}`); + } + + let {type, data, hash} = match.groups; + const mediaType = type.split(';'); + hash = stripHash ? '' : hash; + + let isBase64 = false; + if (mediaType[mediaType.length - 1] === 'base64') { + mediaType.pop(); + isBase64 = true; + } + + // Lowercase MIME type + const mimeType = (mediaType.shift() || '').toLowerCase(); + const attributes = mediaType + .map(attribute => { + let [key, value = ''] = attribute.split('=').map(string => string.trim()); + + // Lowercase `charset` + if (key === 'charset') { + value = value.toLowerCase(); + + if (value === DATA_URL_DEFAULT_CHARSET) { + return ''; + } + } + + return `${key}${value ? `=${value}` : ''}`; + }) + .filter(Boolean); + + const normalizedMediaType = [ + ...attributes, + ]; + + if (isBase64) { + normalizedMediaType.push('base64'); + } + + if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { + normalizedMediaType.unshift(mimeType); + } + + return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; +}; + +function normalizeUrl(urlString, options) { + options = { + defaultProtocol: 'http:', + normalizeProtocol: true, + forceHttp: false, + forceHttps: false, + stripAuthentication: true, + stripHash: false, + stripTextFragment: true, + stripWWW: true, + removeQueryParameters: [/^utm_\w+/i], + removeTrailingSlash: true, + removeSingleSlash: true, + removeDirectoryIndex: false, + sortQueryParameters: true, + ...options, + }; + + urlString = urlString.trim(); + + // Data URL + if (/^data:/i.test(urlString)) { + return normalizeDataURL(urlString, options); + } + + if (/^view-source:/i.test(urlString)) { + throw new Error('`view-source:` is not supported as it is a non-standard protocol'); + } + + const hasRelativeProtocol = urlString.startsWith('//'); + const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); + + // Prepend protocol + if (!isRelativeUrl) { + urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); + } + + const urlObject = new URL(urlString); + + if (options.forceHttp && options.forceHttps) { + throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); + } + + if (options.forceHttp && urlObject.protocol === 'https:') { + urlObject.protocol = 'http:'; + } + + if (options.forceHttps && urlObject.protocol === 'http:') { + urlObject.protocol = 'https:'; + } + + // Remove auth + if (options.stripAuthentication) { + urlObject.username = ''; + urlObject.password = ''; + } + + // Remove hash + if (options.stripHash) { + urlObject.hash = ''; + } else if (options.stripTextFragment) { + urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); + } + + // Remove duplicate slashes if not preceded by a protocol + // NOTE: This could be implemented using a single negative lookbehind + // regex, but we avoid that to maintain compatibility with older js engines + // which do not have support for that feature. + if (urlObject.pathname) { + // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 0) { + let pathComponents = urlObject.pathname.split('/'); + const lastComponent = pathComponents[pathComponents.length - 1]; + + if (testParameter(lastComponent, options.removeDirectoryIndex)) { + pathComponents = pathComponents.slice(0, -1); + urlObject.pathname = pathComponents.slice(1).join('/') + '/'; + } + } + + if (urlObject.hostname) { + // Remove trailing dot + urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); + + // Remove `www.` + if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { + // Each label should be max 63 at length (min: 1). + // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names + // Each TLD should be up to 63 characters long (min: 2). + // It is technically possible to have a single character TLD, but none currently exist. + urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); + } + } + + // Remove query unwanted parameters + if (Array.isArray(options.removeQueryParameters)) { + // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. + for (const key of [...urlObject.searchParams.keys()]) { + if (testParameter(key, options.removeQueryParameters)) { + urlObject.searchParams.delete(key); + } + } + } + + if (options.removeQueryParameters === true) { + urlObject.search = ''; + } + + // Sort query parameters + if (options.sortQueryParameters) { + urlObject.searchParams.sort(); + + // Calling `.sort()` encodes the search parameters, so we need to decode them again. + try { + urlObject.search = decodeURIComponent(urlObject.search); + } catch {} + } + + if (options.removeTrailingSlash) { + urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); + } + + const oldUrlString = urlString; + + // Take advantage of many of the Node `url` normalizations + urlString = urlObject.toString(); + + if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { + urlString = urlString.replace(/\/$/, ''); + } + + // Remove ending `/` unless removeSingleSlash is false + if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { + urlString = urlString.replace(/\/$/, ''); + } + + // Restore relative protocol, if applicable + if (hasRelativeProtocol && !options.normalizeProtocol) { + urlString = urlString.replace(/^http:\/\//, '//'); + } + + // Remove http/https + if (options.stripProtocol) { + urlString = urlString.replace(/^(?:https?:)?\/\//, ''); + } + + return urlString; +} // Dependencies @@ -65,7 +310,7 @@ const parseUrl = (url, normalize = false) => { stripHash: false }; } - url = normalizeUrl__default["default"](url, normalize); + url = normalizeUrl(url, normalize); } const parsed = parsePath__default["default"](url); diff --git a/dist/index.mjs b/dist/index.mjs index 0e255fd..f18c9e3 100755 --- a/dist/index.mjs +++ b/dist/index.mjs @@ -1,5 +1,251 @@ import parsePath from 'parse-path'; -import normalizeUrl from 'normalize-url'; + +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs +const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; +const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; + +const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); + +const normalizeDataURL = (urlString, {stripHash}) => { + const match = /^data:(?[^,]*?),(?[^#]*?)(?:#(?.*))?$/.exec(urlString); + + if (!match) { + throw new Error(`Invalid URL: ${urlString}`); + } + + let {type, data, hash} = match.groups; + const mediaType = type.split(';'); + hash = stripHash ? '' : hash; + + let isBase64 = false; + if (mediaType[mediaType.length - 1] === 'base64') { + mediaType.pop(); + isBase64 = true; + } + + // Lowercase MIME type + const mimeType = (mediaType.shift() || '').toLowerCase(); + const attributes = mediaType + .map(attribute => { + let [key, value = ''] = attribute.split('=').map(string => string.trim()); + + // Lowercase `charset` + if (key === 'charset') { + value = value.toLowerCase(); + + if (value === DATA_URL_DEFAULT_CHARSET) { + return ''; + } + } + + return `${key}${value ? `=${value}` : ''}`; + }) + .filter(Boolean); + + const normalizedMediaType = [ + ...attributes, + ]; + + if (isBase64) { + normalizedMediaType.push('base64'); + } + + if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { + normalizedMediaType.unshift(mimeType); + } + + return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; +}; + +function normalizeUrl(urlString, options) { + options = { + defaultProtocol: 'http:', + normalizeProtocol: true, + forceHttp: false, + forceHttps: false, + stripAuthentication: true, + stripHash: false, + stripTextFragment: true, + stripWWW: true, + removeQueryParameters: [/^utm_\w+/i], + removeTrailingSlash: true, + removeSingleSlash: true, + removeDirectoryIndex: false, + sortQueryParameters: true, + ...options, + }; + + urlString = urlString.trim(); + + // Data URL + if (/^data:/i.test(urlString)) { + return normalizeDataURL(urlString, options); + } + + if (/^view-source:/i.test(urlString)) { + throw new Error('`view-source:` is not supported as it is a non-standard protocol'); + } + + const hasRelativeProtocol = urlString.startsWith('//'); + const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); + + // Prepend protocol + if (!isRelativeUrl) { + urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); + } + + const urlObject = new URL(urlString); + + if (options.forceHttp && options.forceHttps) { + throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); + } + + if (options.forceHttp && urlObject.protocol === 'https:') { + urlObject.protocol = 'http:'; + } + + if (options.forceHttps && urlObject.protocol === 'http:') { + urlObject.protocol = 'https:'; + } + + // Remove auth + if (options.stripAuthentication) { + urlObject.username = ''; + urlObject.password = ''; + } + + // Remove hash + if (options.stripHash) { + urlObject.hash = ''; + } else if (options.stripTextFragment) { + urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); + } + + // Remove duplicate slashes if not preceded by a protocol + // NOTE: This could be implemented using a single negative lookbehind + // regex, but we avoid that to maintain compatibility with older js engines + // which do not have support for that feature. + if (urlObject.pathname) { + // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 0) { + let pathComponents = urlObject.pathname.split('/'); + const lastComponent = pathComponents[pathComponents.length - 1]; + + if (testParameter(lastComponent, options.removeDirectoryIndex)) { + pathComponents = pathComponents.slice(0, -1); + urlObject.pathname = pathComponents.slice(1).join('/') + '/'; + } + } + + if (urlObject.hostname) { + // Remove trailing dot + urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); + + // Remove `www.` + if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { + // Each label should be max 63 at length (min: 1). + // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names + // Each TLD should be up to 63 characters long (min: 2). + // It is technically possible to have a single character TLD, but none currently exist. + urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); + } + } + + // Remove query unwanted parameters + if (Array.isArray(options.removeQueryParameters)) { + // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. + for (const key of [...urlObject.searchParams.keys()]) { + if (testParameter(key, options.removeQueryParameters)) { + urlObject.searchParams.delete(key); + } + } + } + + if (options.removeQueryParameters === true) { + urlObject.search = ''; + } + + // Sort query parameters + if (options.sortQueryParameters) { + urlObject.searchParams.sort(); + + // Calling `.sort()` encodes the search parameters, so we need to decode them again. + try { + urlObject.search = decodeURIComponent(urlObject.search); + } catch {} + } + + if (options.removeTrailingSlash) { + urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); + } + + const oldUrlString = urlString; + + // Take advantage of many of the Node `url` normalizations + urlString = urlObject.toString(); + + if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { + urlString = urlString.replace(/\/$/, ''); + } + + // Remove ending `/` unless removeSingleSlash is false + if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { + urlString = urlString.replace(/\/$/, ''); + } + + // Restore relative protocol, if applicable + if (hasRelativeProtocol && !options.normalizeProtocol) { + urlString = urlString.replace(/^http:\/\//, '//'); + } + + // Remove http/https + if (options.stripProtocol) { + urlString = urlString.replace(/^(?:https?:)?\/\//, ''); + } + + return urlString; +} // Dependencies diff --git a/package-lock.json b/package-lock.json index 67316d3..8d16f7a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,6 @@ "version": "8.0.0", "license": "MIT", "dependencies": { - "normalize-url": "^7.0.3", "parse-path": "^7.0.0" }, "devDependencies": { diff --git a/package.json b/package.json index 15a998b..1c9425f 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,6 @@ "normalize-url": "^7.0.3" }, "dependencies": { - "normalize-url": "^7.0.3", "parse-path": "^7.0.0" }, "files": [ From c2735c9fb3cd31c5261d07464bc08f7bb706fc21 Mon Sep 17 00:00:00 2001 From: Hiroki Osame Date: Wed, 3 Aug 2022 12:30:35 -0400 Subject: [PATCH 2/2] test: use cjs file --- package.json | 4 ++-- test/index.mjs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 1c9425f..e7a1745 100644 --- a/package.json +++ b/package.json @@ -36,9 +36,9 @@ }, "homepage": "https://github.com/IonicaBizau/parse-url", "devDependencies": { + "normalize-url": "^7.0.3", "pkgroll": "^1.4.0", - "tester": "^1.3.1", - "normalize-url": "^7.0.3" + "tester": "^1.3.1" }, "dependencies": { "parse-path": "^7.0.0" diff --git a/test/index.mjs b/test/index.mjs index 7b484ba..0bb6ead 100644 --- a/test/index.mjs +++ b/test/index.mjs @@ -1,5 +1,5 @@ // Dependencies -import parseUrl from "../dist/index.mjs"; +import parseUrl from "../dist/index.js"; import tester from "tester"; import normalizeUrl from "normalize-url";