From 13e9ab7ba977e8f61b6e88a34ac60c65d896cd63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= <98187+Tronic@users.noreply.github.com> Date: Tue, 13 Dec 2022 06:36:21 +0000 Subject: [PATCH] Filename normalisation of form-data/multipart file uploads (umlauts on Apple clients) (#2625) Co-authored-by: L. Karkkainen --- sanic/request.py | 11 +++++++++++ tests/test_requests.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sanic/request.py b/sanic/request.py index 8b8d95305f..592869e5f8 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -27,6 +27,7 @@ from sanic.app import Sanic import email.utils +import unicodedata import uuid from collections import defaultdict @@ -1084,6 +1085,16 @@ def parse_multipart_form(body, boundary): form_parameters["filename*"] ) file_name = unquote(value, encoding=encoding) + + # Normalize to NFC (Apple MacOS/iOS send NFD) + # Notes: + # - No effect for Windows, Linux or Android clients which + # already send NFC + # - Python open() is tricky (creates files in NFC no matter + # which form you use) + if file_name is not None: + file_name = unicodedata.normalize("NFC", file_name) + elif form_header_field == "content-type": content_type = form_header_value content_charset = form_parameters.get("charset", "utf-8") diff --git a/tests/test_requests.py b/tests/test_requests.py index 9a984bb46e..b839296444 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1293,6 +1293,24 @@ async def get(request): "------sanic--\r\n", "filename_\u00A0_test", ), + # Umlaut using NFC normalization (Windows, Linux, Android) + ( + "------sanic\r\n" + 'content-disposition: form-data; filename*="utf-8\'\'filename_%C3%A4_test"; name="test"\r\n' + "\r\n" + "OK\r\n" + "------sanic--\r\n", + "filename_\u00E4_test", + ), + # Umlaut using NFD normalization (MacOS client) + ( + "------sanic\r\n" + 'content-disposition: form-data; filename*="utf-8\'\'filename_a%CC%88_test"; name="test"\r\n' + "\r\n" + "OK\r\n" + "------sanic--\r\n", + "filename_\u00E4_test", # Sanic should normalize to NFC + ), ], ) def test_request_multipart_files(app, payload, filename):