Optimize _encode_invalid_chars (#1787)

Co-authored-by: Seth Michael Larson <sethmichaellarson@gmail.com>
urllib3 · Jan 21, 2020 · a2697e7 · vicvicg · Feb 6, 2020 · pquentin
1 parent d2a5a59
commit a2697e7
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 9 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,6 +6,8 @@ dev
 
 * Drop support for EOL Python 3.4 (Pull #1774)
 
+* Optimize _encode_invalid_chars (Pull #1787)
+
 
 1.25.7 (2019-11-11)
 -------------------

diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py
@@ -216,18 +216,15 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
 
     component = six.ensure_text(component)
 
+    # Normalize existing percent-encoded bytes.
     # Try to see if the component we're encoding is already percent-encoded
     # so we can skip all '%' characters but still encode all others.
-    percent_encodings = PERCENT_RE.findall(component)
-
-    # Normalize existing percent-encoded bytes.
-    for enc in percent_encodings:
-        if not enc.isupper():
-            component = component.replace(enc, enc.upper())
+    component, percent_encodings = PERCENT_RE.subn(
+        lambda match: match.group(0).upper(), component
+    )
 
     uri_bytes = component.encode("utf-8", "surrogatepass")
-    is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%")
-
+    is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
     encoded_component = bytearray()
 
     for i in range(0, len(uri_bytes)):
@@ -237,7 +234,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
         if (is_percent_encoded and byte == b"%") or (
             byte_ord < 128 and byte.decode() in allowed_chars
         ):
-            encoded_component.extend(byte)
+            encoded_component += byte
             continue
         encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))