Skip to content

Commit

Permalink
Respect user-specified enctype in forms
Browse files Browse the repository at this point in the history
Fixes MechanicalSoup#242.

Make MechanicalSoup behave as a real browser:

- only "multipart/form-data" enctype allow file sending

- forms without file inputs can now be sent as "multipart/form-data"
  if specified.

- "application/x-www-form-urlencoded" is the default when enctype is wrong
  or not specified

The added code uses a hack because of a lack in Requests
functionalities: Requests doesn't support forcing enctype yet, and
doesn't allow to submit using "multipart/data-form" enctype without
submitting files. So the best way we found to bypass this is to pass
an modified dict as files to Requests. This code could change in the
future if Requests implements a "force enctype" feature.
  • Loading branch information
moy committed Jul 16, 2019
1 parent 540c9bc commit 616a1d5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 13 deletions.
31 changes: 24 additions & 7 deletions mechanicalsoup/browser.py
Expand Up @@ -155,6 +155,8 @@ def _request(self, form, url=None, **kwargs):
# Requests also retains order when encoding form data in 2-tuple lists.
data = [(k, v) for k, v in data.items()]

multipart = form.get("enctype", "") == "multipart/form-data"

# Process form tags in the order that they appear on the page,
# skipping those tags that do not have a name-attribute.
selector = ",".join("{}[name]".format(i) for i in
Expand All @@ -175,18 +177,17 @@ def _request(self, form, url=None, **kwargs):
# browsers use empty string for inputs with missing values
value = tag.get("value", "")

if tag.get("type", "").lower() == "file":
# read http://www.cs.tut.fi/~jkorpela/forms/file.html
# in browsers, file upload only happens if the form
# (or submit button) enctype attribute is set to
# "multipart/form-data". We don't care, simplify.
# If the enctype is not multipart, the filename is put in
# the form as a text input and the file is not sent.
if tag.get("type", "").lower() == "file" and multipart:
filename = value
if filename != "" and isinstance(filename, string_types):
content = open(filename, "rb")
else:
content = ""
# If value is the empty string, we still pass it for
# consistency with browsers (see #250).
# If value is the empty string, we still pass it
# for consistency with browsers (see
# https://github.com/MechanicalSoup/MechanicalSoup/issues/250).
files[name] = (filename, content)
else:
data.append((name, value))
Expand Down Expand Up @@ -223,6 +224,22 @@ def _request(self, form, url=None, **kwargs):
else:
kwargs["data"] = data

# The following part of the function is here to respect the
# enctype specified by the form, i.e. force sending multipart
# content. Since Requests doesn't have yet a feature to choose
# enctype, we have to use tricks to make it behave as we want
# This code will be updated if Requests implements it.
if multipart and not files:
# Requests will switch to "multipart/form-data" only if
# files pass the `if files:` test, so in this case we use
# a modified dict that passes the if test even if empty.
class DictThatReturnsTrue(dict):
def __bool__(self):
return True
__nonzero__ = __bool__

files = DictThatReturnsTrue()

return self.session.request(method, url, files=files, **kwargs)

def submit(self, form, url=None, **kwargs):
Expand Down
13 changes: 7 additions & 6 deletions tests/test_browser.py
Expand Up @@ -111,9 +111,8 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field):
""".format(httpbin.url, enctype, file_field)
form = BeautifulSoup(form_html, "lxml").form

# For now, assume that the encoding always allow sending file's
# content.
valid_enctype = True
valid_enctype = (enctype in valid_enctypes_file_submit and
valid_enctypes_file_submit[enctype])
expected_content = b"" # default
if submit_file and file_field:
# create a temporary file for testing file upload
Expand All @@ -133,10 +132,10 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field):
browser = mechanicalsoup.Browser()
response = browser._request(form)

if file_field:
expected_enctype = 'multipart/form-data'
if enctype not in valid_enctypes_file_submit:
expected_enctype = default_enctype
else:
expected_enctype = 'application/x-www-form-urlencoded'
expected_enctype = enctype
assert expected_enctype in response.request.headers["Content-Type"]

resp = response.json()
Expand All @@ -153,6 +152,8 @@ def test_enctype_and_file_submit(httpbin, enctype, submit_file, file_field):
assert key in ("files", "form")
found = True
found_in = key
if key == "files" and not valid_enctype:
assert not value

assert found == bool(file_field)
if file_field:
Expand Down

0 comments on commit 616a1d5

Please sign in to comment.