Skip to content

Commit

Permalink
webutil: util.requests_*(): handle emoji domains that are invalid idn…
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed May 8, 2021
1 parent 455807f commit 49e5505
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
6 changes: 6 additions & 0 deletions tests/test_util.py
Expand Up @@ -1267,3 +1267,9 @@ def test_requests_get_unicode_url_ConnectionError(self):
self.expect_requests_get(url).AndRaise(requests.ConnectionError())
self.mox.ReplayAll()
self.assertRaises(exc.HTTPBadGateway, util.requests_get, url, gateway=True)

def test_requests_get_invalid_emoji_domain_fallback_to_domain2idnaError(self):
self.expect_requests_get('http://abc⊙.de/').AndRaise(requests.exceptions.InvalidURL())
self.expect_requests_get('http://xn--abc-yr2a.de/', 'ok')
self.mox.ReplayAll()
self.assertEqual(200, util.requests_get('http://abc⊙.de/').status_code)
10 changes: 10 additions & 0 deletions util.py
Expand Up @@ -26,6 +26,7 @@
from xml.sax import saxutils

from cachetools import cached, TTLCache
from domain2idna import domain2idna

try:
import ujson
Expand Down Expand Up @@ -1509,6 +1510,15 @@ def call(url, *args, **kwargs):
logging.info(f'Received {resp.status_code}: {"" if resp.ok else resp.text[:500]}')
resp.raise_for_status()
except (ValueError, requests.URLRequired) as e:
if isinstance(e, requests.exceptions.InvalidURL):
punycode = domain2idna(url) # surprisingly, this handles full URLs fine
if punycode != url:
# the domain is valid idn2003 but not idn2008. encode and try again.
# https://unicode.org/faq/idn.html#6
# https://github.com/psf/requests/issues/3687
# https://github.com/kjd/idna/issues/18
# https://github.com/kjd/idna/issues/40
return call(punycode, *args, **kwargs)
if gateway:
msg = f'Bad URL {url} : {e}'
logging.warning(msg)
Expand Down

0 comments on commit 49e5505

Please sign in to comment.