Skip to content

Commit

Permalink
Filename normalisation of form-data/multipart file uploads (umlauts o…
Browse files Browse the repository at this point in the history
…n Apple clients) (#2625)

Co-authored-by: L. Karkkainen <tronic@users.noreply.github.com>
  • Loading branch information
Tronic and Tronic committed Dec 13, 2022
1 parent 92e7463 commit 13e9ab7
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
11 changes: 11 additions & 0 deletions sanic/request.py
Expand Up @@ -27,6 +27,7 @@
from sanic.app import Sanic

import email.utils
import unicodedata
import uuid

from collections import defaultdict
Expand Down Expand Up @@ -1084,6 +1085,16 @@ def parse_multipart_form(body, boundary):
form_parameters["filename*"]
)
file_name = unquote(value, encoding=encoding)

# Normalize to NFC (Apple MacOS/iOS send NFD)
# Notes:
# - No effect for Windows, Linux or Android clients which
# already send NFC
# - Python open() is tricky (creates files in NFC no matter
# which form you use)
if file_name is not None:
file_name = unicodedata.normalize("NFC", file_name)

elif form_header_field == "content-type":
content_type = form_header_value
content_charset = form_parameters.get("charset", "utf-8")
Expand Down
18 changes: 18 additions & 0 deletions tests/test_requests.py
Expand Up @@ -1293,6 +1293,24 @@ async def get(request):
"------sanic--\r\n",
"filename_\u00A0_test",
),
# Umlaut using NFC normalization (Windows, Linux, Android)
(
"------sanic\r\n"
'content-disposition: form-data; filename*="utf-8\'\'filename_%C3%A4_test"; name="test"\r\n'
"\r\n"
"OK\r\n"
"------sanic--\r\n",
"filename_\u00E4_test",
),
# Umlaut using NFD normalization (MacOS client)
(
"------sanic\r\n"
'content-disposition: form-data; filename*="utf-8\'\'filename_a%CC%88_test"; name="test"\r\n'
"\r\n"
"OK\r\n"
"------sanic--\r\n",
"filename_\u00E4_test", # Sanic should normalize to NFC
),
],
)
def test_request_multipart_files(app, payload, filename):
Expand Down

0 comments on commit 13e9ab7

Please sign in to comment.