From a2697e7c6b275f05879b60f593c5854a816489f0 Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Tue, 21 Jan 2020 22:32:56 +0400 Subject: [PATCH] Optimize _encode_invalid_chars (#1787) Co-authored-by: Seth Michael Larson --- CHANGES.rst | 2 ++ src/urllib3/util/url.py | 15 ++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f384438b61..20f7697652 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,8 @@ dev * Drop support for EOL Python 3.4 (Pull #1774) +* Optimize _encode_invalid_chars (Pull #1787) + 1.25.7 (2019-11-11) ------------------- diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index b01499ea82..8ef5a2311d 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -216,18 +216,15 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): component = six.ensure_text(component) + # Normalize existing percent-encoded bytes. # Try to see if the component we're encoding is already percent-encoded # so we can skip all '%' characters but still encode all others. - percent_encodings = PERCENT_RE.findall(component) - - # Normalize existing percent-encoded bytes. - for enc in percent_encodings: - if not enc.isupper(): - component = component.replace(enc, enc.upper()) + component, percent_encodings = PERCENT_RE.subn( + lambda match: match.group(0).upper(), component + ) uri_bytes = component.encode("utf-8", "surrogatepass") - is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%") - + is_percent_encoded = percent_encodings == uri_bytes.count(b"%") encoded_component = bytearray() for i in range(0, len(uri_bytes)): @@ -237,7 +234,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): if (is_percent_encoded and byte == b"%") or ( byte_ord < 128 and byte.decode() in allowed_chars ): - encoded_component.extend(byte) + encoded_component += byte continue encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))