diff --git a/dparse/parser.py b/dparse/parser.py index 7218e8c..eb62632 100644 --- a/dparse/parser.py +++ b/dparse/parser.py @@ -8,7 +8,7 @@ from configparser import ConfigParser, NoOptionError -from .regex import URL_REGEX, HASH_REGEX +from .regex import HASH_REGEX from .dependencies import DependencyFile, Dependency from packaging.requirements import Requirement as PackagingRequirement, InvalidRequirement @@ -175,10 +175,11 @@ def parse_index_server(cls, line): :param line: :return: """ - matches = URL_REGEX.findall(line) - if matches: - url = matches[0] - return url if url.endswith("/") else url + "/" + groups = re.split(pattern="[=\s]+", string=line.strip(), maxsplit=100) + + if len(groups) >= 2: + return groups[1] if groups[1].endswith("/") else groups[1] + "/" + return None @classmethod @@ -346,6 +347,7 @@ def parse(self): except (toml.TomlDecodeError, IndexError) as e: pass + class PipfileLockParser(Parser): def parse(self): diff --git a/dparse/regex.py b/dparse/regex.py index 40cc409..4a1204e 100644 --- a/dparse/regex.py +++ b/dparse/regex.py @@ -1,39 +1,4 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, unicode_literals -import re -# see https://gist.github.com/dperini/729294 -URL_REGEX = re.compile( - # protocol identifier - "(?:(?:https?|ftp)://)" - # user:pass authentication - "(?:\S+(?::\S*)?@)?" - "(?:" - # IP address exclusion - # private & local networks - "(?!(?:10|127)(?:\.\d{1,3}){3})" - "(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" - "(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" - # IP address dotted notation octets - # excludes loopback network 0.0.0.0 - # excludes reserved space >= 224.0.0.0 - # excludes network & broadcast addresses - # (first & last IP address of each class) - "(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" - "(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" - "(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" - "|" - # host name - "(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" - # domain name - "(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" - # TLD identifier - "(?:\.(?:[a-z\u00a1-\uffff]{2,}))" - ")" - # port number - "(?::\d{2,5})?" - # resource path - "(?:/\S*)?", - re.UNICODE) - -HASH_REGEX = r"--hash[=| ][\w]+:[\w]+" +HASH_REGEX = r"--hash[=| ]\w+:\w+"