From 7f36886bd64daa4f1fc972bae0fa546a2518d051 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Wed, 22 Apr 2020 18:59:19 +0530 Subject: [PATCH 01/12] Improve scheme check in repo url --- twine/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/twine/utils.py b/twine/utils.py index 81b935ed..fb8cb3a5 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -104,14 +104,15 @@ def get_repository_from_config( ) -> RepositoryConfig: # Get our config from, if provided, command-line values for the # repository name and URL, or the .pypirc file - if repository_url and "://" in repository_url: + parsed = urlparse(repository_url) + if repository_url and parsed.scheme: # prefer CLI `repository_url` over `repository` or .pypirc return { "repository": repository_url, "username": None, "password": None, } - if repository_url and "://" not in repository_url: + if repository_url and not parsed.scheme: raise exceptions.UnreachableRepositoryURLDetected( "Repository URL {} has no protocol. Please add " "'https://'. \n".format(repository_url) From 24bf15df128dff8aa0dea5a027d94843416a97c9 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sat, 9 May 2020 00:27:54 +0530 Subject: [PATCH 02/12] Use rfc3986 to validate repo url --- setup.cfg | 1 + twine/utils.py | 32 +++++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index e00e405d..fa41cab6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,7 @@ install_requires= tqdm >= 4.14 importlib_metadata; python_version < "3.8" keyring >= 15.1 + rfc3986 >= 1.4.0 setup_requires = setuptools_scm >= 1.15 diff --git a/twine/utils.py b/twine/utils.py index fb8cb3a5..dcf42535 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -28,6 +28,8 @@ from urllib.parse import urlunparse import requests +from rfc3986 import uri_reference +from rfc3986 import validators from twine import exceptions @@ -99,20 +101,44 @@ def get_config(path: str = "~/.pypirc") -> Dict[str, RepositoryConfig]: return dict(config) +def validate_url(repository_url: Optional[str]) -> Any: + """Validate the given url for allowed schemes and components""" + + if not repository_url: + return None + + # Scheme should always be https, and the url should at minimum + # contain scheme and host + validator = ( + validators.Validator() + .allow_schemes("https") + .require_presence_of("scheme", "host") + ) + + # Only return scheme when the url is valid + url = uri_reference(repository_url) + try: + validator.validate(url) + except Exception: + return None + + return url.scheme + + def get_repository_from_config( config_file: str, repository: str, repository_url: Optional[str] = None ) -> RepositoryConfig: # Get our config from, if provided, command-line values for the # repository name and URL, or the .pypirc file - parsed = urlparse(repository_url) - if repository_url and parsed.scheme: + scheme = validate_url(repository_url) + if repository_url and scheme: # prefer CLI `repository_url` over `repository` or .pypirc return { "repository": repository_url, "username": None, "password": None, } - if repository_url and not parsed.scheme: + if repository_url and not scheme: raise exceptions.UnreachableRepositoryURLDetected( "Repository URL {} has no protocol. Please add " "'https://'. \n".format(repository_url) From 8bc6d8a1a43733a02ecea770cd70262b6169461c Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sat, 9 May 2020 00:32:18 +0530 Subject: [PATCH 03/12] Patch validate_url in integration tests for devpi and pypi server --- tests/test_integration.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index e1c931ff..2c8eadca 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,7 +1,8 @@ from twine import cli +from twine import utils -def test_devpi_upload(devpi_server, uploadable_dist): +def test_devpi_upload(devpi_server, uploadable_dist, monkeypatch): command = [ "upload", "--repository-url", @@ -12,6 +13,9 @@ def test_devpi_upload(devpi_server, uploadable_dist): devpi_server.password, str(uploadable_dist), ] + + # Patching validate url to return https for devpiserver url + monkeypatch.setattr(utils, "validate_url", lambda repository_url: "https") cli.dispatch(command) @@ -38,7 +42,7 @@ def test_pypi_upload(sampleproject_dist): cli.dispatch(command) -def test_pypiserver_upload(pypiserver_instance, uploadable_dist): +def test_pypiserver_upload(pypiserver_instance, uploadable_dist, monkeypatch): command = [ "upload", "--repository-url", @@ -49,4 +53,7 @@ def test_pypiserver_upload(pypiserver_instance, uploadable_dist): "any", str(uploadable_dist), ] + + # Patching validate url to return https for pypiserver url + monkeypatch.setattr(utils, "validate_url", lambda repository_url: "https") cli.dispatch(command) From 9120e2e447339e718f1c5ff6d2d916a28cd3b22a Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sun, 10 May 2020 13:57:11 +0530 Subject: [PATCH 04/12] Update validate_url to return boolean --- tests/test_integration.py | 8 ++++---- twine/utils.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index 2c8eadca..b0c8ff35 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -14,8 +14,8 @@ def test_devpi_upload(devpi_server, uploadable_dist, monkeypatch): str(uploadable_dist), ] - # Patching validate url to return https for devpiserver url - monkeypatch.setattr(utils, "validate_url", lambda repository_url: "https") + # Patching validate url to validate devpi server url by allowing http + monkeypatch.setattr(utils, "validate_url", lambda repository_url: True) cli.dispatch(command) @@ -54,6 +54,6 @@ def test_pypiserver_upload(pypiserver_instance, uploadable_dist, monkeypatch): str(uploadable_dist), ] - # Patching validate url to return https for pypiserver url - monkeypatch.setattr(utils, "validate_url", lambda repository_url: "https") + # Patching validate url to validate devpi server url by allowing http + monkeypatch.setattr(utils, "validate_url", lambda repository_url: True) cli.dispatch(command) diff --git a/twine/utils.py b/twine/utils.py index dcf42535..468686d8 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -101,11 +101,11 @@ def get_config(path: str = "~/.pypirc") -> Dict[str, RepositoryConfig]: return dict(config) -def validate_url(repository_url: Optional[str]) -> Any: +def validate_url(repository_url: Optional[str]) -> bool: """Validate the given url for allowed schemes and components""" if not repository_url: - return None + return False # Scheme should always be https, and the url should at minimum # contain scheme and host @@ -120,9 +120,9 @@ def validate_url(repository_url: Optional[str]) -> Any: try: validator.validate(url) except Exception: - return None + return False - return url.scheme + return True def get_repository_from_config( From 9dbbd272ad40ee4958bf5700f3048302571a482d Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sun, 10 May 2020 14:17:05 +0530 Subject: [PATCH 05/12] Ignore missing type annotations for rfc3986 --- mypy.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy.ini b/mypy.ini index 4da4c6b0..40451b88 100644 --- a/mypy.ini +++ b/mypy.ini @@ -49,3 +49,6 @@ ignore_missing_imports = True [mypy-urllib3] ; https://github.com/urllib3/urllib3/issues/867 ignore_missing_imports = True + +[mypy-rfc3986] +ignore_missing_imports = True From 60e2c9f8e46a67c086b1d822751a200c8e0b7435 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sun, 10 May 2020 20:54:48 +0530 Subject: [PATCH 06/12] Revert patching of integration test --- tests/test_integration.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index b0c8ff35..e1c931ff 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,8 +1,7 @@ from twine import cli -from twine import utils -def test_devpi_upload(devpi_server, uploadable_dist, monkeypatch): +def test_devpi_upload(devpi_server, uploadable_dist): command = [ "upload", "--repository-url", @@ -13,9 +12,6 @@ def test_devpi_upload(devpi_server, uploadable_dist, monkeypatch): devpi_server.password, str(uploadable_dist), ] - - # Patching validate url to validate devpi server url by allowing http - monkeypatch.setattr(utils, "validate_url", lambda repository_url: True) cli.dispatch(command) @@ -42,7 +38,7 @@ def test_pypi_upload(sampleproject_dist): cli.dispatch(command) -def test_pypiserver_upload(pypiserver_instance, uploadable_dist, monkeypatch): +def test_pypiserver_upload(pypiserver_instance, uploadable_dist): command = [ "upload", "--repository-url", @@ -53,7 +49,4 @@ def test_pypiserver_upload(pypiserver_instance, uploadable_dist, monkeypatch): "any", str(uploadable_dist), ] - - # Patching validate url to validate devpi server url by allowing http - monkeypatch.setattr(utils, "validate_url", lambda repository_url: True) cli.dispatch(command) From 781c39e967db16e660bfed2e5b166683a01bdd53 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sun, 10 May 2020 20:59:14 +0530 Subject: [PATCH 07/12] Refactor validate_url to allow http and raise specific exceptions --- twine/utils.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/twine/utils.py b/twine/utils.py index 468686d8..d59e1e7c 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -28,6 +28,7 @@ from urllib.parse import urlunparse import requests +from rfc3986 import exceptions as rfc_exceptions from rfc3986 import uri_reference from rfc3986 import validators @@ -101,28 +102,19 @@ def get_config(path: str = "~/.pypirc") -> Dict[str, RepositoryConfig]: return dict(config) -def validate_url(repository_url: Optional[str]) -> bool: +def validate_url(repository_url: str) -> None: """Validate the given url for allowed schemes and components""" - if not repository_url: - return False - - # Scheme should always be https, and the url should at minimum - # contain scheme and host + # Allowed schemes are http and https, based on whether the repository + # supports TLS or not, and scheme and host must be present in the URL validator = ( validators.Validator() - .allow_schemes("https") + .allow_schemes("http", "https") .require_presence_of("scheme", "host") ) - # Only return scheme when the url is valid url = uri_reference(repository_url) - try: - validator.validate(url) - except Exception: - return False - - return True + validator.validate(url) def get_repository_from_config( @@ -130,19 +122,24 @@ def get_repository_from_config( ) -> RepositoryConfig: # Get our config from, if provided, command-line values for the # repository name and URL, or the .pypirc file - scheme = validate_url(repository_url) - if repository_url and scheme: + + if repository_url: + try: + validate_url(repository_url) + except rfc_exceptions.MissingComponentError as exc: + raise exceptions.UnreachableRepositoryURLDetected( + "Repository URL has missing components. {}.".format(exc.args[0]) + ) + except rfc_exceptions.UnpermittedComponentError as exc: + raise exceptions.UnreachableRepositoryURLDetected( + "Repository URL has an invalid scheme. {}".format(exc.args[0]) + ) # prefer CLI `repository_url` over `repository` or .pypirc return { "repository": repository_url, "username": None, "password": None, } - if repository_url and not scheme: - raise exceptions.UnreachableRepositoryURLDetected( - "Repository URL {} has no protocol. Please add " - "'https://'. \n".format(repository_url) - ) try: return get_config(config_file)[repository] except KeyError: From 1b1276f48c4e28fc668719e21bc4acc6254734a0 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Sun, 10 May 2020 21:01:33 +0530 Subject: [PATCH 08/12] Add unit tests to check invalid repo urls --- tests/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index a9d759cb..cc9a2236 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -184,15 +184,51 @@ def test_get_repository_config_missing(tmpdir): assert utils.get_repository_from_config(pypirc, "pypi") == exp -def test_get_repository_config_invalid_url(tmpdir): +def test_get_repository_config_invalid_scheme(tmpdir): """ - Test if we get an URL without a protocol + Test if we get an URL with a invalid scheme + """ + + import re + + pypirc = os.path.join(str(tmpdir), ".pypirc") + + with pytest.raises( + exceptions.UnreachableRepositoryURLDetected, + match=re.escape( + r"Repository URL has an invalid scheme. " + r"scheme was required to be one of ['http', 'https'] but was 'ftp'" + ), + ): + utils.get_repository_from_config(pypirc, "foo.bar", "ftp://test.pypi.org") + + +def test_get_repository_config_missing_components(tmpdir): + """ + Test if we get an URL with missing components """ pypirc = os.path.join(str(tmpdir), ".pypirc") - repository_url = "foo.bar" - with pytest.raises(exceptions.UnreachableRepositoryURLDetected): - utils.get_repository_from_config(pypirc, "foo.bar", repository_url) + with pytest.raises( + exceptions.UnreachableRepositoryURLDetected, + match="Repository URL has missing components. " + "host was required but missing.", + ): + utils.get_repository_from_config(pypirc, "foo.bar", "https:/") + + with pytest.raises( + exceptions.UnreachableRepositoryURLDetected, + match="Repository URL has missing components. " + "scheme was required but missing.", + ): + utils.get_repository_from_config(pypirc, "foo.bar", "//test.pypi.org") + + with pytest.raises( + exceptions.UnreachableRepositoryURLDetected, + match="Repository URL has missing components. " + "host, scheme were required but missing.", + ): + utils.get_repository_from_config(pypirc, "foo.bar", "foo.bar") def test_get_repository_config_missing_config(tmpdir): From 197670a8ce2426686d4a942cf131cdfde4f1380d Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Mon, 11 May 2020 03:49:03 +0530 Subject: [PATCH 09/12] Fix imports and error message --- twine/utils.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/twine/utils.py b/twine/utils.py index d59e1e7c..16c071a4 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -28,9 +28,7 @@ from urllib.parse import urlunparse import requests -from rfc3986 import exceptions as rfc_exceptions -from rfc3986 import uri_reference -from rfc3986 import validators +import rfc3986 from twine import exceptions @@ -108,12 +106,12 @@ def validate_url(repository_url: str) -> None: # Allowed schemes are http and https, based on whether the repository # supports TLS or not, and scheme and host must be present in the URL validator = ( - validators.Validator() + rfc3986.validators.Validator() .allow_schemes("http", "https") .require_presence_of("scheme", "host") ) - url = uri_reference(repository_url) + url = rfc3986.uri_reference(repository_url) validator.validate(url) @@ -126,13 +124,9 @@ def get_repository_from_config( if repository_url: try: validate_url(repository_url) - except rfc_exceptions.MissingComponentError as exc: + except rfc3986.exceptions.RFC3986Exception as exc: raise exceptions.UnreachableRepositoryURLDetected( - "Repository URL has missing components. {}.".format(exc.args[0]) - ) - except rfc_exceptions.UnpermittedComponentError as exc: - raise exceptions.UnreachableRepositoryURLDetected( - "Repository URL has an invalid scheme. {}".format(exc.args[0]) + f"Invalid repository URL: {exc.args[0]}." ) # prefer CLI `repository_url` over `repository` or .pypirc return { From 5c2fff315e44fcf362d301a9988bc9cdfeb6673d Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Mon, 11 May 2020 03:49:50 +0530 Subject: [PATCH 10/12] Fix error message check in test utils invalid url --- tests/test_utils.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index cc9a2236..2836dcbd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -189,16 +189,12 @@ def test_get_repository_config_invalid_scheme(tmpdir): Test if we get an URL with a invalid scheme """ - import re - pypirc = os.path.join(str(tmpdir), ".pypirc") with pytest.raises( exceptions.UnreachableRepositoryURLDetected, - match=re.escape( - r"Repository URL has an invalid scheme. " - r"scheme was required to be one of ['http', 'https'] but was 'ftp'" - ), + match=r"Invalid repository URL: " + r"scheme was required to be one of \['http', 'https'\] but was 'ftp'.", ): utils.get_repository_from_config(pypirc, "foo.bar", "ftp://test.pypi.org") @@ -211,22 +207,19 @@ def test_get_repository_config_missing_components(tmpdir): with pytest.raises( exceptions.UnreachableRepositoryURLDetected, - match="Repository URL has missing components. " - "host was required but missing.", + match="Invalid repository URL: host was required but missing.", ): utils.get_repository_from_config(pypirc, "foo.bar", "https:/") with pytest.raises( exceptions.UnreachableRepositoryURLDetected, - match="Repository URL has missing components. " - "scheme was required but missing.", + match="Invalid repository URL: scheme was required but missing.", ): utils.get_repository_from_config(pypirc, "foo.bar", "//test.pypi.org") with pytest.raises( exceptions.UnreachableRepositoryURLDetected, - match="Repository URL has missing components. " - "host, scheme were required but missing.", + match="Invalid repository URL: host, scheme were required but missing.", ): utils.get_repository_from_config(pypirc, "foo.bar", "foo.bar") From 42b032be772121005e8d5c78f7eb1d4eb0718cd9 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Mon, 11 May 2020 03:53:15 +0530 Subject: [PATCH 11/12] Fix mypy missing import order for rfc3986 --- mypy.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mypy.ini b/mypy.ini index 40451b88..1aedc625 100644 --- a/mypy.ini +++ b/mypy.ini @@ -38,6 +38,9 @@ ignore_missing_imports = True ; https://github.com/pypa/readme_renderer/issues/166 ignore_missing_imports = True +[mypy-rfc3986] +ignore_missing_imports = True + [mypy-setuptools] ; https://github.com/python/typeshed/issues/2171 ignore_missing_imports = True @@ -49,6 +52,3 @@ ignore_missing_imports = True [mypy-urllib3] ; https://github.com/urllib3/urllib3/issues/867 ignore_missing_imports = True - -[mypy-rfc3986] -ignore_missing_imports = True From 88f6a99721089ca67f80ad6f75d6c9b4bdef0901 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Singh Date: Mon, 11 May 2020 15:47:56 +0530 Subject: [PATCH 12/12] Make validate url private and move exception handling inside it --- twine/utils.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/twine/utils.py b/twine/utils.py index 16c071a4..d0fa277f 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -100,7 +100,7 @@ def get_config(path: str = "~/.pypirc") -> Dict[str, RepositoryConfig]: return dict(config) -def validate_url(repository_url: str) -> None: +def _validate_repository_url(repository_url: str) -> None: """Validate the given url for allowed schemes and components""" # Allowed schemes are http and https, based on whether the repository @@ -110,9 +110,12 @@ def validate_url(repository_url: str) -> None: .allow_schemes("http", "https") .require_presence_of("scheme", "host") ) - - url = rfc3986.uri_reference(repository_url) - validator.validate(url) + try: + validator.validate(rfc3986.uri_reference(repository_url)) + except rfc3986.exceptions.RFC3986Exception as exc: + raise exceptions.UnreachableRepositoryURLDetected( + f"Invalid repository URL: {exc.args[0]}." + ) def get_repository_from_config( @@ -122,12 +125,7 @@ def get_repository_from_config( # repository name and URL, or the .pypirc file if repository_url: - try: - validate_url(repository_url) - except rfc3986.exceptions.RFC3986Exception as exc: - raise exceptions.UnreachableRepositoryURLDetected( - f"Invalid repository URL: {exc.args[0]}." - ) + _validate_repository_url(repository_url) # prefer CLI `repository_url` over `repository` or .pypirc return { "repository": repository_url,