From 616a1d5aad812befea3940cca85dfffe2f86cdb8 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Tue, 16 Jul 2019 17:18:08 +0200 Subject: [PATCH] Respect user-specified enctype in forms Fixes #242. Make MechanicalSoup behave as a real browser: - only "multipart/form-data" enctype allow file sending - forms without file inputs can now be sent as "multipart/form-data" if specified. - "application/x-www-form-urlencoded" is the default when enctype is wrong or not specified The added code uses a hack because of a lack in Requests functionalities: Requests doesn't support forcing enctype yet, and doesn't allow to submit using "multipart/data-form" enctype without submitting files. So the best way we found to bypass this is to pass an modified dict as files to Requests. This code could change in the future if Requests implements a "force enctype" feature. --- mechanicalsoup/browser.py | 31 ++++++++++++++++++++++++------- tests/test_browser.py | 13 +++++++------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py index 35e40473..3979c647 100644 --- a/mechanicalsoup/browser.py +++ b/mechanicalsoup/browser.py @@ -155,6 +155,8 @@ def _request(self, form, url=None, **kwargs): # Requests also retains order when encoding form data in 2-tuple lists. data = [(k, v) for k, v in data.items()] + multipart = form.get("enctype", "") == "multipart/form-data" + # Process form tags in the order that they appear on the page, # skipping those tags that do not have a name-attribute. selector = ",".join("{}[name]".format(i) for i in @@ -175,18 +177,17 @@ def _request(self, form, url=None, **kwargs): # browsers use empty string for inputs with missing values value = tag.get("value", "") - if tag.get("type", "").lower() == "file": - # read http://www.cs.tut.fi/~jkorpela/forms/file.html - # in browsers, file upload only happens if the form - # (or submit button) enctype attribute is set to - # "multipart/form-data". We don't care, simplify. + # If the enctype is not multipart, the filename is put in + # the form as a text input and the file is not sent. + if tag.get("type", "").lower() == "file" and multipart: filename = value if filename != "" and isinstance(filename, string_types): content = open(filename, "rb") else: content = "" - # If value is the empty string, we still pass it for - # consistency with browsers (see #250). + # If value is the empty string, we still pass it + # for consistency with browsers (see + # https://github.com/MechanicalSoup/MechanicalSoup/issues/250). files[name] = (filename, content) else: data.append((name, value)) @@ -223,6 +224,22 @@ def _request(self, form, url=None, **kwargs): else: kwargs["data"] = data + # The following part of the function is here to respect the + # enctype specified by the form, i.e. force sending multipart + # content. Since Requests doesn't have yet a feature to choose + # enctype, we have to use tricks to make it behave as we want + # This code will be updated if Requests implements it. + if multipart and not files: + # Requests will switch to "multipart/form-data" only if + # files pass the `if files:` test, so in this case we use + # a modified dict that passes the if test even if empty. + class DictThatReturnsTrue(dict): + def __bool__(self): + return True + __nonzero__ = __bool__ + + files = DictThatReturnsTrue() + return self.session.request(method, url, files=files, **kwargs) def submit(self, form, url=None, **kwargs): diff --git a/tests/test_browser.py b/tests/test_browser.py index 3280afa3..e16ec7f3 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -111,9 +111,8 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field): """.format(httpbin.url, enctype, file_field) form = BeautifulSoup(form_html, "lxml").form - # For now, assume that the encoding always allow sending file's - # content. - valid_enctype = True + valid_enctype = (enctype in valid_enctypes_file_submit and + valid_enctypes_file_submit[enctype]) expected_content = b"" # default if submit_file and file_field: # create a temporary file for testing file upload @@ -133,10 +132,10 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field): browser = mechanicalsoup.Browser() response = browser._request(form) - if file_field: - expected_enctype = 'multipart/form-data' + if enctype not in valid_enctypes_file_submit: + expected_enctype = default_enctype else: - expected_enctype = 'application/x-www-form-urlencoded' + expected_enctype = enctype assert expected_enctype in response.request.headers["Content-Type"] resp = response.json() @@ -153,6 +152,8 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field): assert key in ("files", "form") found = True found_in = key + if key == "files" and not valid_enctype: + assert not value assert found == bool(file_field) if file_field: