forked from python-validators/validators
/
url.py
154 lines (128 loc) · 4.82 KB
/
url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import re
from .utils import validator
ip_middle_octet = r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))"
ip_last_octet = r"(?:\.(?:0|[1-9]\d?|1\d\d|2[0-4]\d|25[0-5]))"
regex = re.compile( # noqa: W605
r"^"
# protocol identifier
r"(?:(?:https?|ftp)://)"
# user:pass authentication
r"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+"
r"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?"
r"(?:"
r"(?P<private_ip>"
# IP address exclusion
# private & local networks
r"(?:(?:10|127)" + ip_middle_octet + r"{2}" + ip_last_octet + r")|"
r"(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + r")|"
r"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + r"))"
r"|"
# private & local hosts
r"(?P<private_host>"
r"(?:localhost))"
r"|"
# IP address dotted notation octets
# excludes loopback network 0.0.0.0
# excludes reserved space >= 224.0.0.0
# excludes network & broadcast addresses
# (first & last IP address of each class)
r"(?P<public_ip>"
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
r"" + ip_middle_octet + r"{2}"
r"" + ip_last_octet + r")"
r"|"
# IPv6 RegEx from https://stackoverflow.com/a/17871737
r"\[("
# 1:2:3:4:5:6:7:8
r"([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|"
# 1:: 1:2:3:4:5:6:7::
r"([0-9a-fA-F]{1,4}:){1,7}:|"
# 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8
r"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|"
# 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8
r"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|"
# 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8
r"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|"
# 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8
r"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|"
# 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8
r"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|"
# 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8
r"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|"
# ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::
r":((:[0-9a-fA-F]{1,4}){1,7}|:)|"
# fe80::7:8%eth0 fe80::7:8%1
# (link-local IPv6 addresses with zone index)
r"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|"
r"::(ffff(:0{1,4}){0,1}:){0,1}"
r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"
# ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255
# (IPv4-mapped IPv6 addresses and IPv4-translated addresses)
r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|"
r"([0-9a-fA-F]{1,4}:){1,4}:"
r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"
# 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33
# (IPv4-Embedded IPv6 Address)
r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])"
r")\]|"
# host name
r"(?:(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)"
# domain name
r"(?:\.(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*"
# TLD identifier
r"(?:\.(?:(?:xn--[-]{0,2}[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))"
r")"
# port number
r"(?::\d{2,5})?"
# resource path
r"(?:/[-a-z\u00a1-\uffff\U00010000-\U0010ffff0-9._~%!$&'()*+,;=:@/]*)?"
# query string
r"(?:\?\S*)?"
# fragment
r"(?:#\S*)?"
r"$",
re.UNICODE | re.IGNORECASE
)
pattern = re.compile(regex)
@validator
def url(value, public=False):
"""
Return whether or not given value is a valid URL.
If the value is valid URL this function returns ``True``, otherwise
:class:`~validators.utils.ValidationFailure`.
This validator is based on the wonderful `URL validator of dperini`_.
.. _URL validator of dperini:
https://gist.github.com/dperini/729294
Examples::
>>> url('http://foobar.dk')
True
>>> url('ftp://foobar.dk')
True
>>> url('http://10.0.0.1')
True
>>> url('http://foobar.d')
ValidationFailure(func=url, ...)
>>> url('http://10.0.0.1', public=True)
ValidationFailure(func=url, ...)
.. versionadded:: 0.2
.. versionchanged:: 0.10.2
Added support for various exotic URLs and fixed various false
positives.
.. versionchanged:: 0.10.3
Added ``public`` parameter.
.. versionchanged:: 0.11.0
Made the regular expression this function uses case insensitive.
.. versionchanged:: 0.11.3
Added support for URLs containing localhost
:param value: URL address string to validate
:param public: (default=False) Set True to only allow a public IP address
"""
result = pattern.match(value)
if not public:
return result
return result and not any(
(result.groupdict().get(key) for key in ('private_ip', 'private_host'))
)