Skip to content

Commit

Permalink
Move types into source, drop Python 3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmlarson committed Jan 22, 2021
1 parent a9e4f73 commit b25f03e
Show file tree
Hide file tree
Showing 16 changed files with 171 additions and 229 deletions.
11 changes: 9 additions & 2 deletions .travis.yml
Expand Up @@ -3,19 +3,26 @@ arch:
- amd64
- ppc64le
python:
- "3.4"
- "3.5"
- "3.6"
- "3.7"
- "3.8"
- "3.9"
- "pypy3"
# Disable unsuported version pypy for ppc64le
# Disable unsupported version pypy for ppc64le
jobs:
exclude:
- arch: ppc64le
python: pypy3

matrix:
include:
- python: 3.9
install:
- pip install mypy
script:
- mypy --strict idna/

install:
- pip install .
script:
Expand Down
2 changes: 1 addition & 1 deletion idna/__init__.py
Expand Up @@ -12,14 +12,14 @@
check_nfc,
decode,
encode,
intranges_contain,
ulabel,
uts46_remap,
valid_contextj,
valid_contexto,
valid_label_length,
valid_string_length,
)
from .intranges import intranges_contain

__all__ = [
"IDNABidiError",
Expand Down
1 change: 1 addition & 0 deletions idna/codec.py
@@ -1,3 +1,4 @@
# type: ignore
from .core import encode, decode, alabel, ulabel, IDNAError
import codecs
import re
Expand Down
27 changes: 0 additions & 27 deletions idna/codec.pyi

This file was deleted.

7 changes: 4 additions & 3 deletions idna/compat.py
@@ -1,12 +1,13 @@
from .core import *
from .codec import *
from typing import Any

def ToASCII(label):
def ToASCII(label: str) -> bytes:
return encode(label)

def ToUnicode(label):
def ToUnicode(label: bytes) -> str:
return decode(label)

def nameprep(s):
def nameprep(s: Any) -> None:
raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol')

5 changes: 0 additions & 5 deletions idna/compat.pyi

This file was deleted.

109 changes: 63 additions & 46 deletions idna/core.py
Expand Up @@ -2,7 +2,7 @@
import bisect
import unicodedata
import re
import sys
from typing import Union, Optional
from .intranges import intranges_contain

_virama_combining_class = 9
Expand All @@ -29,38 +29,38 @@ class InvalidCodepointContext(IDNAError):
pass


def _combining_class(cp):
def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp))
if v == 0:
if not unicodedata.name(chr(cp)):
raise ValueError('Unknown character in unicodedata')
return v

def _is_script(cp, script):
def _is_script(cp: str, script: str) -> bool:
return intranges_contain(ord(cp), idnadata.scripts[script])

def _punycode(s):
def _punycode(s: str) -> bytes:
return s.encode('punycode')

def _unot(s):
def _unot(s: int) -> str:
return 'U+{:04X}'.format(s)


def valid_label_length(label):
def valid_label_length(label: Union[bytes, str]) -> bool:

if len(label) > 63:
return False
return True


def valid_string_length(label, trailing_dot):
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:

if len(label) > (254 if trailing_dot else 253):
return False
return True


def check_bidi(label, check_ltr=False):
def check_bidi(label: str, check_ltr: bool = False) -> bool:

# Bidi rules should only be applied if string contains RTL characters
bidi_label = False
Expand All @@ -84,7 +84,7 @@ def check_bidi(label, check_ltr=False):
raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label)))

valid_ending = False
number_type = False
number_type: Union[str, bool] = False
for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)

Expand Down Expand Up @@ -120,14 +120,14 @@ def check_bidi(label, check_ltr=False):
return True


def check_initial_combiner(label):
def check_initial_combiner(label: str) -> bool:

if unicodedata.category(label[0])[0] == 'M':
raise IDNAError('Label begins with an illegal combining character')
return True


def check_hyphen_ok(label):
def check_hyphen_ok(label: str) -> bool:

if label[2:4] == '--':
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
Expand All @@ -136,13 +136,13 @@ def check_hyphen_ok(label):
return True


def check_nfc(label):
def check_nfc(label: str) -> None:

if unicodedata.normalize('NFC', label) != label:
raise IDNAError('Label must be in Normalization Form C')


def valid_contextj(label, pos):
def valid_contextj(label: str, pos: int) -> bool:

cp_value = ord(label[pos])

Expand Down Expand Up @@ -186,7 +186,7 @@ def valid_contextj(label, pos):
return False


def valid_contexto(label, pos, exception=False):
def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:

cp_value = ord(label[pos])

Expand Down Expand Up @@ -226,8 +226,10 @@ def valid_contexto(label, pos, exception=False):
return False
return True

return False

def check_label(label):

def check_label(label: Union[str, bytes, bytearray]) -> None:

if isinstance(label, (bytes, bytearray)):
label = label.decode('utf-8')
Expand Down Expand Up @@ -259,14 +261,14 @@ def check_label(label):
check_bidi(label)


def alabel(label):
def alabel(label: str) -> bytes:

try:
label = label.encode('ascii')
ulabel(label)
if not valid_label_length(label):
label_bytes = label.encode('ascii')
ulabel(label_bytes)
if not valid_label_length(label_bytes):
raise IDNAError('Label too long')
return label
return label_bytes
except UnicodeEncodeError:
pass

Expand All @@ -275,51 +277,58 @@ def alabel(label):

label = str(label)
check_label(label)
label = _punycode(label)
label = _alabel_prefix + label
label_bytes = _punycode(label)
label_bytes = _alabel_prefix + label_bytes

if not valid_label_length(label):
if not valid_label_length(label_bytes):
raise IDNAError('Label too long')

return label
return label_bytes


def ulabel(label):
def ulabel(label: Union[str, bytes, bytearray]) -> str:

if not isinstance(label, (bytes, bytearray)):
try:
label = label.encode('ascii')
label_bytes = label.encode('ascii')
except UnicodeEncodeError:
check_label(label)
return label
else:
label_bytes = label

label = label.lower()
if label.startswith(_alabel_prefix):
label = label[len(_alabel_prefix):]
if not label:
label_bytes = label_bytes.lower()
if label_bytes.startswith(_alabel_prefix):
label_bytes = label_bytes[len(_alabel_prefix):]
if not label_bytes:
raise IDNAError('Malformed A-label, no Punycode eligible content found')
if label.decode('ascii')[-1] == '-':
if label_bytes.decode('ascii')[-1] == '-':
raise IDNAError('A-label must not end with a hyphen')
else:
check_label(label)
return label.decode('ascii')
check_label(label_bytes)
return label_bytes.decode('ascii')

label = label.decode('punycode')
label = label_bytes.decode('punycode')
check_label(label)
return label


def uts46_remap(domain, std3_rules=True, transitional=False):
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
"""Re-map the characters in the string according to UTS46 processing."""
from .uts46data import uts46data
output = ''
try:
for pos, char in enumerate(domain):
code_point = ord(char)

for pos, char in enumerate(domain):
code_point = ord(char)
try:
uts46row = uts46data[code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1]
status = uts46row[1]
replacement = uts46row[2] if len(uts46row) == 3 else None
replacement: Optional[str]
if len(uts46row) == 3:
replacement = uts46row[2] # type: ignore
else:
replacement = None
if (status == 'V' or
(status == 'D' and not transitional) or
(status == '3' and not std3_rules and replacement is None)):
Expand All @@ -330,14 +339,19 @@ def uts46_remap(domain, std3_rules=True, transitional=False):
output += replacement
elif status != 'I':
raise IndexError()
return unicodedata.normalize('NFC', output)
except IndexError:
raise InvalidCodepoint(
'Codepoint {} not allowed at position {} in {}'.format(
_unot(code_point), pos + 1, repr(domain)))
except IndexError:
raise InvalidCodepoint(
'Codepoint {} not allowed at position {} in {}'.format(
_unot(code_point), pos + 1, repr(domain)))

return unicodedata.normalize('NFC', output)


def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
def encode(s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False,
transitional: bool = False) -> bytes:

if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii')
Expand Down Expand Up @@ -368,7 +382,10 @@ def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
return s


def decode(s, strict=False, uts46=False, std3_rules=False):
def decode(s: Union[str, bytes, bytearray],
strict: bool = False,
uts46: bool = False,
std3_rules: bool = False) -> str:

if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii')
Expand Down
39 changes: 0 additions & 39 deletions idna/core.pyi

This file was deleted.

0 comments on commit b25f03e

Please sign in to comment.