From fee8256cb25ff14bd1e232b6e8552fb48b4001ad Mon Sep 17 00:00:00 2001 From: Dan Hemberger Date: Thu, 17 Jan 2019 10:58:10 -0800 Subject: [PATCH 01/11] Fix tests for bs4 4.7.0+ CSS selectors in bs4 now return elements in page order, whereas they did not previously. This requires us to re-order some of our expected test output, and to perform an order-independent comparison if tested with a bs4 version before 4.7.0. Tested and passing with bs4 4.6.0 and 4.7.1. Closes #257. --- tests/test_form.py | 12 ++++++------ tests/test_stateful_browser.py | 10 +++++----- tests/utils.py | 9 ++++++++- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/test_form.py b/tests/test_form.py index 8db20429..32d6463f 100644 --- a/tests/test_form.py +++ b/tests/test_form.py @@ -74,30 +74,30 @@ def test_submit_set(httpbin): @pytest.mark.parametrize("expected_post", [ pytest.param( [ + ('text', 'Setting some text!'), ('comment', 'Testing preview page'), ('preview', 'Preview Page'), - ('text', 'Setting some text!') ], id='preview'), pytest.param( [ + ('text', '= Heading =\n\nNew page here!\n'), ('comment', 'Created new page'), ('save', 'Submit changes'), - ('text', '= Heading =\n\nNew page here!\n') ], id='save'), pytest.param( [ + ('text', '= Heading =\n\nNew page here!\n'), ('comment', 'Testing choosing cancel button'), ('cancel', 'Cancel'), - ('text', '= Heading =\n\nNew page here!\n') ], id='cancel'), ]) def test_choose_submit(expected_post): browser, url = setup_mock_browser(expected_post=expected_post) browser.open(url) form = browser.select_form('#choose-submit-form') - browser['text'] = expected_post[2][1] - browser['comment'] = expected_post[0][1] - form.choose_submit(expected_post[1][0]) + browser['text'] = dict(expected_post)['text'] + browser['comment'] = dict(expected_post)['comment'] + form.choose_submit(expected_post[2][0]) res = browser.submit_selected() assert(res.status_code == 200 and res.text == 'Success!') diff --git a/tests/test_stateful_browser.py b/tests/test_stateful_browser.py index 291bec2a..5c5876a1 100644 --- a/tests/test_stateful_browser.py +++ b/tests/test_stateful_browser.py @@ -125,15 +125,15 @@ def test_links(): @pytest.mark.parametrize("expected_post", [ pytest.param( [ + ('text', 'Setting some text!'), ('comment', 'Selecting an input submit'), ('diff', 'Review Changes'), - ('text', 'Setting some text!') ], id='input'), pytest.param( [ + ('text', '= Heading =\n\nNew page here!\n'), ('comment', 'Selecting a button submit'), ('cancel', 'Cancel'), - ('text', '= Heading =\n\nNew page here!\n') ], id='button'), ]) def test_submit_btnName(expected_post): @@ -141,9 +141,9 @@ def test_submit_btnName(expected_post): browser, url = setup_mock_browser(expected_post=expected_post) browser.open(url) browser.select_form('#choose-submit-form') - browser['text'] = expected_post[2][1] - browser['comment'] = expected_post[0][1] - res = browser.submit_selected(btnName=expected_post[1][0]) + browser['text'] = dict(expected_post)['text'] + browser['comment'] = dict(expected_post)['comment'] + res = browser.submit_selected(btnName=expected_post[2][0]) assert(res.status_code == 200 and res.text == 'Success!') diff --git a/tests/utils.py b/tests/utils.py index 25f8ea40..3d4a72b3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,7 @@ import mechanicalsoup import requests_mock +from distutils.version import StrictVersion +import bs4 try: from urllib.parse import parse_qsl except ImportError: @@ -62,7 +64,12 @@ def mock_post(mocked_adapter, url, expected, reply='Success!'): def text_callback(request, context): # Python 2's parse_qsl doesn't like None argument query = parse_qsl(request.text) if request.text else [] - assert (query == expected) + # In bs4 4.7.0+, CSS selectors return elements in page order, + # but did not in earlier versions. + if StrictVersion(bs4.__version__) >= StrictVersion('4.7.0'): + assert query == expected + else: + assert sorted(query) == sorted(expected) return reply mocked_adapter.register_uri('POST', url, text=text_callback) From eb636b004749b3562a9b1229022fc8abe178ea91 Mon Sep 17 00:00:00 2001 From: Dan Hemberger Date: Wed, 16 Jan 2019 16:29:28 -0800 Subject: [PATCH 02/11] Fix assert in test_form_noaction The `message` parameter to `pytest.raises` was deprecated in pytest version 4.1. > PytestDeprecationWarning: The 'message' parameter is deprecated. > (did you mean to use `match='some regex'` to check the exception message?) Yes! We did mean to use `match`, and now we do! Thanks pytest. --- tests/test_stateful_browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_stateful_browser.py b/tests/test_stateful_browser.py index 5c5876a1..71aa7fba 100644 --- a/tests/test_stateful_browser.py +++ b/tests/test_stateful_browser.py @@ -257,7 +257,7 @@ def test_form_noaction(): browser, url = setup_mock_browser() browser.open_fake_page(submit_form_noaction) browser.select_form('#choose-submit-form') - with pytest.raises(ValueError, message="no URL to submit to"): + with pytest.raises(ValueError, match="no URL to submit to"): browser.submit_selected() From 354934adc0ba514f393726c1271c0d17dca0489d Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Sat, 13 Oct 2018 18:35:18 +0200 Subject: [PATCH 03/11] browser: accept non-lowercase type="radio" and "checkbox" Fixes #245. --- mechanicalsoup/browser.py | 5 ++++- tests/test_browser.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py index 3fb66a6d..19f19a63 100644 --- a/mechanicalsoup/browser.py +++ b/mechanicalsoup/browser.py @@ -163,7 +163,10 @@ def _request(self, form, url=None, **kwargs): name = tag.get("name") # name-attribute of tag if tag.name == "input": - if tag.get("type") in ("radio", "checkbox"): + if ( + tag.get("type") is not None and + tag.get("type").lower() in ("radio", "checkbox") + ): if "checked" not in tag.attrs: continue value = tag.get("value", "on") diff --git a/tests/test_browser.py b/tests/test_browser.py index 978af417..fba12c77 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -45,14 +45,14 @@ def test__request(httpbin):
Pizza Size -

Small

-

Medium

+

Small

+

Medium

Large

Pizza Toppings -

Bacon

-

Extra Cheese

+

Bacon

+

Extra Cheese

Onion

Mushroom

From c0a3203b36c9456d82fcc1fe3e5fc8fe78bac17a Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Sat, 13 Oct 2018 18:51:09 +0200 Subject: [PATCH 04/11] form: case-insensitive search for radio and checkboxes --- mechanicalsoup/form.py | 12 +++++++++--- tests/test_form.py | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/mechanicalsoup/form.py b/mechanicalsoup/form.py index ab28e8cc..28221a24 100644 --- a/mechanicalsoup/form.py +++ b/mechanicalsoup/form.py @@ -111,8 +111,8 @@ def set_checkbox(self, data, uncheck_other_boxes=True): the HTML is served. """ for (name, value) in data.items(): - checkboxes = self.form.find_all("input", {"name": name}, - type="checkbox") + # Case-insensitive search for type=checkbox + checkboxes = self.find_by_type("input", "checkbox", {'name': name}) if not checkboxes: raise InvalidFormMethod("No input checkbox named " + name) @@ -155,7 +155,8 @@ def set_radio(self, data): Only one radio button in the family can be checked. """ for (name, value) in data.items(): - radios = self.form.find_all("input", {"name": name}, type="radio") + # Case-insensitive search for type=radio + radios = self.find_by_type("input", "radio", {'name': name}) if not radios: raise InvalidFormMethod("No input radio named " + name) @@ -373,3 +374,8 @@ def print_summary(self): if subtag.string: subtag.string = subtag.string.strip() print(input_copy) + + def find_by_type(self, tag_name, type_attr, attrs): + attrs_dict = attrs.copy() + attrs_dict['type'] = lambda x: x and x.lower() == type_attr + return self.form.find_all(tag_name, attrs=attrs_dict) diff --git a/tests/test_form.py b/tests/test_form.py index 32d6463f..f54718e9 100644 --- a/tests/test_form.py +++ b/tests/test_form.py @@ -313,6 +313,38 @@ def test_form_not_found(): form.set_select({'entree': ('no_multiple', 'no_multiple')}) +def test_form_set_radio_checkbox(capsys): + browser = mechanicalsoup.StatefulBrowser() + browser.open_fake_page(page_with_various_fields, + url="http://example.com/invalid/") + form = browser.select_form("form") + form.set_radio({"size": "small"}) + form.set_checkbox({"topping": "cheese"}) + browser.get_current_form().print_summary() + out, err = capsys.readouterr() + # Different versions of bs4 show either or + # . Normalize before comparing. + out = out.replace('>', '/>') + assert out == """ + + + + + + + + + + + +""" + assert err == "" + + page_with_radio = '''
@@ -351,14 +383,14 @@ def test_form_check_uncheck(): Pizza Toppings

-

-

Small

+

Small

Medium

Large

@@ -386,10 +418,10 @@ def test_form_print_summary(capsys): - + - + From 2f3dc51a8ab9015f21571fb2e0617bc7aadc35d3 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Sat, 13 Oct 2018 21:21:04 +0200 Subject: [PATCH 05/11] Accept type=submit, button, reset and file case-insensitively --- mechanicalsoup/browser.py | 7 +++++-- mechanicalsoup/form.py | 6 ++++-- tests/test_browser.py | 2 +- tests/test_form.py | 6 +++--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py index 19f19a63..4d6de3fb 100644 --- a/mechanicalsoup/browser.py +++ b/mechanicalsoup/browser.py @@ -174,7 +174,10 @@ def _request(self, form, url=None, **kwargs): # browsers use empty string for inputs with missing values value = tag.get("value", "") - if tag.get("type") == "file": + if ( + tag.get("type") is not None and + tag.get("type").lower() == "file" + ): # read http://www.cs.tut.fi/~jkorpela/forms/file.html # in browsers, file upload only happens if the form # (or submit button) enctype attribute is set to @@ -188,7 +191,7 @@ def _request(self, form, url=None, **kwargs): data.append((name, value)) elif tag.name == "button": - if tag.get("type", "") in ("button", "reset"): + if tag.get("type", "").lower() in ("button", "reset"): continue else: data.append((name, tag.get("value", ""))) diff --git a/mechanicalsoup/form.py b/mechanicalsoup/form.py index 28221a24..e11265be 100644 --- a/mechanicalsoup/form.py +++ b/mechanicalsoup/form.py @@ -333,8 +333,10 @@ def choose_submit(self, submit): raise Exception('Submit already chosen. Cannot change submit!') # All buttons NOT of type (button,reset) are valid submits - inps = [i for i in self.form.select('input[type="submit"], button') - if i.get('type', '') not in ('button', 'reset')] + inps = (self.find_by_type("input", "submit", dict()) + + self.form.find_all("button")) + inps = [i for i in inps + if i.get('type', '').lower() not in ('button', 'reset')] # If no submit specified, choose the first one if submit is None and inps: diff --git a/tests/test_browser.py b/tests/test_browser.py index fba12c77..049649ca 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -56,7 +56,7 @@ def test__request(httpbin):

Onion

Mushroom

- + + """ browser, url = setup_mock_browser(expected_post=expected_post, text=text) From 144aee4d850db34268cdf39375c90c0fc1442793 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Thu, 17 Jan 2019 21:49:03 +0100 Subject: [PATCH 06/11] FAQ: Update on alternatives FAQ: Update on alternatives mechanize is back on track, RoboBrowser seems clearly abandoned. --- docs/faq.rst | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index fc9737b7..01739c3d 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -77,18 +77,22 @@ There are other libraries with the same purpose as MechanicalSoup: * `Mechanize `__ is an ancestor of MechanicalSoup (getting its name from the Perl mechanize - module). It was a great tool, but doesn't support Python 3. It was - unmaintained for several years but got a new maintainer in 2017. - Note that Mechanize is a much bigger piece of code (around 20 times - more lines!) than MechanicalSoup, which is small because it - delegates most of its work to BeautifulSoup and requests. + module). It was a great tool, but became unmaintained for several + years and didn't support Python 3. Fortunately, Mechanize got a new + maintainer in 2017 and completed Python 3 support in 2019. Note that + Mechanize is a much bigger piece of code (around 20 times more + lines!) than MechanicalSoup, which is small because it delegates + most of its work to BeautifulSoup and requests. * `RoboBrowser `__ is very similar to MechanicalSoup. Both are small libraries built on top of requests and BeautifulSoup. Their APIs are very similar. Both have an automated testsuite. As of writing, MechanicalSoup is more actively - maintained (only 1 really active developer and no activity the last - two years for RoboBrowser). + maintained (only 1 really active developer and no activity since + 2015 on RoboBrowser). RoboBrowser is `broken on Python 3.7 + `__, and while + there is an easy workaround this is a sign that the lack of activity + is due to the project being abandoned more than to its maturity. * `Selenium `__ is a much heavier solution: it launches a real web browser (Firefox, From 3d9160515912b3b25c283268477b5f4679b97301 Mon Sep 17 00:00:00 2001 From: Dan Hemberger Date: Thu, 17 Jan 2019 13:32:44 -0800 Subject: [PATCH 07/11] browser.py: check tag type consistently The tag types were being checked in two ways: 1. tag.get("type", "").lower() == X 2. tag.get("type") is not None and tag.get("type").lower() == X Since these should be identical, change all to the first option, which is simpler and faster. This is a follow-up to #246. --- mechanicalsoup/browser.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py index 4d6de3fb..d81d3b1c 100644 --- a/mechanicalsoup/browser.py +++ b/mechanicalsoup/browser.py @@ -163,10 +163,7 @@ def _request(self, form, url=None, **kwargs): name = tag.get("name") # name-attribute of tag if tag.name == "input": - if ( - tag.get("type") is not None and - tag.get("type").lower() in ("radio", "checkbox") - ): + if tag.get("type", "").lower() in ("radio", "checkbox"): if "checked" not in tag.attrs: continue value = tag.get("value", "on") @@ -174,10 +171,7 @@ def _request(self, form, url=None, **kwargs): # browsers use empty string for inputs with missing values value = tag.get("value", "") - if ( - tag.get("type") is not None and - tag.get("type").lower() == "file" - ): + if tag.get("type", "").lower() == "file": # read http://www.cs.tut.fi/~jkorpela/forms/file.html # in browsers, file upload only happens if the form # (or submit button) enctype attribute is set to From 3906f0e20e46ddafdbf27f4e90126879c1aaa5aa Mon Sep 17 00:00:00 2001 From: Dan Hemberger Date: Thu, 17 Jan 2019 12:35:02 -0800 Subject: [PATCH 08/11] Improve `test_link_arg_*` tests The `test_link_arg_regex` test was accidentally passing. By adding another link to the test html, we can see that it was selecting the first link instead of the link that matched the supplied regex. This new version of the test fails, but it will be fixed by #256. Also added a case for no arguments and reduced code duplication by changing it to a parameterized test. --- tests/test_stateful_browser.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_stateful_browser.py b/tests/test_stateful_browser.py index 71aa7fba..316cfb26 100644 --- a/tests/test_stateful_browser.py +++ b/tests/test_stateful_browser.py @@ -421,18 +421,18 @@ def test_referer_submit_headers(httpbin): assert headers['X-Test-Header'] == 'x-test-value' -def test_link_arg_text(httpbin): - browser = mechanicalsoup.StatefulBrowser() - browser.open_fake_page('Link', httpbin.url) - browser.follow_link(link_text='Link') - assert browser.get_url() == httpbin + '/get' - - -def test_link_arg_regex(httpbin): +@pytest.mark.parametrize('expected, kwargs', [ + pytest.param('/foo', {}, id='none'), + pytest.param('/get', {'text': 'Link'}, id='text'), + pytest.param('/get', {'url_regex': 'get'}, id='regex', + marks=pytest.mark.xfail), +]) +def test_follow_link_arg(httpbin, expected, kwargs): browser = mechanicalsoup.StatefulBrowser() - browser.open_fake_page('Link', httpbin.url) - browser.follow_link(url_regex='.*') - assert browser.get_url() == httpbin + '/get' + html = 'BarLink' + browser.open_fake_page(html, httpbin.url) + browser.follow_link(**kwargs) + assert browser.get_url() == httpbin + expected def test_link_arg_multiregex(httpbin): From f70d4e7f35cef55389ca82d5c1613af35d41d0db Mon Sep 17 00:00:00 2001 From: Dan Hemberger Date: Sun, 27 Jan 2019 18:46:57 -0800 Subject: [PATCH 09/11] Do not submit disabled form elements Closes #248. MechanicalSoup was incorrectly ignoring `disabled` attributes in form elements. --- docs/ChangeLog.rst | 10 ++++++++++ mechanicalsoup/browser.py | 4 ++++ tests/test_browser.py | 12 ++++++++++++ 3 files changed, 26 insertions(+) diff --git a/docs/ChangeLog.rst b/docs/ChangeLog.rst index 7daff107..6dfaa34c 100644 --- a/docs/ChangeLog.rst +++ b/docs/ChangeLog.rst @@ -5,6 +5,16 @@ Release Notes Version 1.0 (in development) ============================ +Bug fixes +--------- + +* Form controls with the ``disabled`` attribute will no longer be submitted + to improve compliance with the HTML standard. If you were relying on this + bug to submit disabled elements, you can still achieve this by deleting the + ``disabled`` attribute from the element in the :class:`~mechanicalsoup.Form` + object directly. + [`#248 `__] + Version 0.11 ============ diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py index d81d3b1c..9accc264 100644 --- a/mechanicalsoup/browser.py +++ b/mechanicalsoup/browser.py @@ -162,6 +162,10 @@ def _request(self, form, url=None, **kwargs): for tag in form.select(selector): name = tag.get("name") # name-attribute of tag + # Skip disabled elements, since they should not be submitted. + if tag.has_attr('disabled'): + continue + if tag.name == "input": if tag.get("type", "").lower() in ("radio", "checkbox"): if "checked" not in tag.attrs: diff --git a/tests/test_browser.py b/tests/test_browser.py index 049649ca..2ed3f96d 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -129,6 +129,18 @@ def test__request_select_none(httpbin): assert response.json()['form'] == {'shape': 'round'} +def test__request_disabled_attr(httpbin): + """Make sure that disabled form controls are not submitted.""" + form_html = """ +
+ +
""".format(httpbin.url) + + browser = mechanicalsoup.Browser() + response = browser._request(BeautifulSoup(form_html, "lxml").form) + assert response.json()['form'] == {} + + def test_no_404(httpbin): browser = mechanicalsoup.Browser() resp = browser.get(httpbin + "/nosuchpage") From 6d6670a640ad5e5e5185fbd04c9ab694eb42cf43 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Wed, 13 Feb 2019 10:04:00 +0100 Subject: [PATCH 10/11] Mechanize {is -> was} incompatible with Python 3 Also, point to the bug in the new GitHub repository, the one we pointed to is obsolete. --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 9f2dc7d4..e8cb9597 100644 --- a/README.rst +++ b/README.rst @@ -18,8 +18,8 @@ JavaScript. MechanicalSoup was created by `M Hickford `__, who was a fond user of the `Mechanize `__ library. -Unfortunately, Mechanize is `incompatible with Python 3 -`__ and its development +Unfortunately, Mechanize was `incompatible with Python 3 until 2019 +`__ and its development stalled for several years. MechanicalSoup provides a similar API, built on Python giants `Requests `__ (for HTTP sessions) and `BeautifulSoup From d74818442cf0e84a7a15de8532534639b2f3e837 Mon Sep 17 00:00:00 2001 From: Thibault Roche Date: Wed, 16 Jan 2019 17:00:17 +0100 Subject: [PATCH 11/11] debug _find_link_internal and remove xfail on related test If you called the function with url_regex='/something' and not the link argument, it would set the url_regex to None, and thus the result would be impredictable. This is tested by the test added by #261 --- mechanicalsoup/stateful_browser.py | 2 +- tests/test_stateful_browser.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mechanicalsoup/stateful_browser.py b/mechanicalsoup/stateful_browser.py index bc18ab16..d215cbba 100644 --- a/mechanicalsoup/stateful_browser.py +++ b/mechanicalsoup/stateful_browser.py @@ -292,7 +292,7 @@ def _find_link_internal(self, link, args, kwargs): raise ValueError('link parameter cannot be treated as ' 'url_regex because url_regex is already ' 'present in keyword arguments') - else: + elif link: kwargs['url_regex'] = link try: diff --git a/tests/test_stateful_browser.py b/tests/test_stateful_browser.py index 316cfb26..e23d3cdd 100644 --- a/tests/test_stateful_browser.py +++ b/tests/test_stateful_browser.py @@ -424,8 +424,7 @@ def test_referer_submit_headers(httpbin): @pytest.mark.parametrize('expected, kwargs', [ pytest.param('/foo', {}, id='none'), pytest.param('/get', {'text': 'Link'}, id='text'), - pytest.param('/get', {'url_regex': 'get'}, id='regex', - marks=pytest.mark.xfail), + pytest.param('/get', {'url_regex': 'get'}, id='regex'), ]) def test_follow_link_arg(httpbin, expected, kwargs): browser = mechanicalsoup.StatefulBrowser()