Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

debug _find_link_internal #256

Closed
wants to merge 11 commits into from
4 changes: 2 additions & 2 deletions README.rst
Expand Up @@ -18,8 +18,8 @@ JavaScript.
MechanicalSoup was created by `M Hickford
<https://github.com/hickford/>`__, who was a fond user of the
`Mechanize <https://github.com/jjlee/mechanize>`__ library.
Unfortunately, Mechanize is `incompatible with Python 3
<https://github.com/jjlee/mechanize/issues/96>`__ and its development
Unfortunately, Mechanize was `incompatible with Python 3 until 2019
<https://github.com/python-mechanize/mechanize/issues/9>`__ and its development
stalled for several years. MechanicalSoup provides a similar API, built on Python
giants `Requests <http://docs.python-requests.org/en/latest/>`__ (for
HTTP sessions) and `BeautifulSoup
Expand Down
10 changes: 10 additions & 0 deletions docs/ChangeLog.rst
Expand Up @@ -5,6 +5,16 @@ Release Notes
Version 1.0 (in development)
============================

Bug fixes
---------

* Form controls with the ``disabled`` attribute will no longer be submitted
to improve compliance with the HTML standard. If you were relying on this
bug to submit disabled elements, you can still achieve this by deleting the
``disabled`` attribute from the element in the :class:`~mechanicalsoup.Form`
object directly.
[`#248 <https://github.com/MechanicalSoup/MechanicalSoup/issues/248>`__]

Version 0.11
============

Expand Down
18 changes: 11 additions & 7 deletions docs/faq.rst
Expand Up @@ -77,18 +77,22 @@ There are other libraries with the same purpose as MechanicalSoup:

* `Mechanize <http://wwwsearch.sourceforge.net/mechanize/>`__ is an
ancestor of MechanicalSoup (getting its name from the Perl mechanize
module). It was a great tool, but doesn't support Python 3. It was
unmaintained for several years but got a new maintainer in 2017.
Note that Mechanize is a much bigger piece of code (around 20 times
more lines!) than MechanicalSoup, which is small because it
delegates most of its work to BeautifulSoup and requests.
module). It was a great tool, but became unmaintained for several
years and didn't support Python 3. Fortunately, Mechanize got a new
maintainer in 2017 and completed Python 3 support in 2019. Note that
Mechanize is a much bigger piece of code (around 20 times more
lines!) than MechanicalSoup, which is small because it delegates
most of its work to BeautifulSoup and requests.

* `RoboBrowser <https://github.com/jmcarp/robobrowser>`__ is very
similar to MechanicalSoup. Both are small libraries built on top of
requests and BeautifulSoup. Their APIs are very similar. Both have an
automated testsuite. As of writing, MechanicalSoup is more actively
maintained (only 1 really active developer and no activity the last
two years for RoboBrowser).
maintained (only 1 really active developer and no activity since
2015 on RoboBrowser). RoboBrowser is `broken on Python 3.7
<https://github.com/jmcarp/robobrowser/issues/87>`__, and while
there is an easy workaround this is a sign that the lack of activity
is due to the project being abandoned more than to its maturity.

* `Selenium <http://selenium-python.readthedocs.io/>`__ is a much
heavier solution: it launches a real web browser (Firefox,
Expand Down
10 changes: 7 additions & 3 deletions mechanicalsoup/browser.py
Expand Up @@ -162,16 +162,20 @@ def _request(self, form, url=None, **kwargs):
for tag in form.select(selector):
name = tag.get("name") # name-attribute of tag

# Skip disabled elements, since they should not be submitted.
if tag.has_attr('disabled'):
continue

if tag.name == "input":
if tag.get("type") in ("radio", "checkbox"):
if tag.get("type", "").lower() in ("radio", "checkbox"):
if "checked" not in tag.attrs:
continue
value = tag.get("value", "on")
else:
# browsers use empty string for inputs with missing values
value = tag.get("value", "")

if tag.get("type") == "file":
if tag.get("type", "").lower() == "file":
# read http://www.cs.tut.fi/~jkorpela/forms/file.html
# in browsers, file upload only happens if the form
# (or submit button) enctype attribute is set to
Expand All @@ -185,7 +189,7 @@ def _request(self, form, url=None, **kwargs):
data.append((name, value))

elif tag.name == "button":
if tag.get("type", "") in ("button", "reset"):
if tag.get("type", "").lower() in ("button", "reset"):
continue
else:
data.append((name, tag.get("value", "")))
Expand Down
18 changes: 13 additions & 5 deletions mechanicalsoup/form.py
Expand Up @@ -111,8 +111,8 @@ def set_checkbox(self, data, uncheck_other_boxes=True):
the HTML is served.
"""
for (name, value) in data.items():
checkboxes = self.form.find_all("input", {"name": name},
type="checkbox")
# Case-insensitive search for type=checkbox
checkboxes = self.find_by_type("input", "checkbox", {'name': name})
if not checkboxes:
raise InvalidFormMethod("No input checkbox named " + name)

Expand Down Expand Up @@ -155,7 +155,8 @@ def set_radio(self, data):
Only one radio button in the family can be checked.
"""
for (name, value) in data.items():
radios = self.form.find_all("input", {"name": name}, type="radio")
# Case-insensitive search for type=radio
radios = self.find_by_type("input", "radio", {'name': name})
if not radios:
raise InvalidFormMethod("No input radio named " + name)

Expand Down Expand Up @@ -332,8 +333,10 @@ def choose_submit(self, submit):
raise Exception('Submit already chosen. Cannot change submit!')

# All buttons NOT of type (button,reset) are valid submits
inps = [i for i in self.form.select('input[type="submit"], button')
if i.get('type', '') not in ('button', 'reset')]
inps = (self.find_by_type("input", "submit", dict()) +
self.form.find_all("button"))
inps = [i for i in inps
if i.get('type', '').lower() not in ('button', 'reset')]

# If no submit specified, choose the first one
if submit is None and inps:
Expand Down Expand Up @@ -373,3 +376,8 @@ def print_summary(self):
if subtag.string:
subtag.string = subtag.string.strip()
print(input_copy)

def find_by_type(self, tag_name, type_attr, attrs):
attrs_dict = attrs.copy()
attrs_dict['type'] = lambda x: x and x.lower() == type_attr
return self.form.find_all(tag_name, attrs=attrs_dict)
2 changes: 1 addition & 1 deletion mechanicalsoup/stateful_browser.py
Expand Up @@ -292,7 +292,7 @@ def _find_link_internal(self, link, args, kwargs):
raise ValueError('link parameter cannot be treated as '
'url_regex because url_regex is already '
'present in keyword arguments')
else:
elif link:
kwargs['url_regex'] = link

try:
Expand Down
22 changes: 17 additions & 5 deletions tests/test_browser.py
Expand Up @@ -45,18 +45,18 @@ def test__request(httpbin):
<textarea name="comments">freezer</textarea>
<fieldset>
<legend> Pizza Size </legend>
<p><input type=radio name=size value="small">Small</p>
<p><input type=radio name=size value="medium" checked>Medium</p>
<p><input type=RADIO name=size value="small">Small</p>
<p><input type=radiO name=size value="medium" checked>Medium</p>
<p><input type=radio name=size value="large">Large</p>
</fieldset>
<fieldset>
<legend> Pizza Toppings </legend>
<p><input type=checkbox name="topping" value="bacon" checked>Bacon</p>
<p><input type=checkbox name="topping" value="cheese">Extra Cheese</p>
<p><input type=CHECKBOX name="topping" value="bacon" checked>Bacon</p>
<p><input type=checkBox name="topping" value="cheese">Extra Cheese</p>
<p><input type=checkbox name="topping" value="onion" checked>Onion</p>
<p><input type=checkbox name="topping" value="mushroom">Mushroom</p>
</fieldset>
<input name="pic" type="file">
<input name="pic" type="FiLe">
<select name="shape">
<option value="round">Round</option>
<option value="square" selected>Square</option>
Expand Down Expand Up @@ -129,6 +129,18 @@ def test__request_select_none(httpbin):
assert response.json()['form'] == {'shape': 'round'}


def test__request_disabled_attr(httpbin):
"""Make sure that disabled form controls are not submitted."""
form_html = """
<form method="post" action="{}/post">
<input disabled name="nosubmit" value="1" />
</form>""".format(httpbin.url)

browser = mechanicalsoup.Browser()
response = browser._request(BeautifulSoup(form_html, "lxml").form)
assert response.json()['form'] == {}


def test_no_404(httpbin):
browser = mechanicalsoup.Browser()
resp = browser.get(httpbin + "/nosuchpage")
Expand Down
58 changes: 45 additions & 13 deletions tests/test_form.py
Expand Up @@ -74,30 +74,30 @@ def test_submit_set(httpbin):
@pytest.mark.parametrize("expected_post", [
pytest.param(
[
('text', 'Setting some text!'),
('comment', 'Testing preview page'),
('preview', 'Preview Page'),
('text', 'Setting some text!')
], id='preview'),
pytest.param(
[
('text', '= Heading =\n\nNew page here!\n'),
('comment', 'Created new page'),
('save', 'Submit changes'),
('text', '= Heading =\n\nNew page here!\n')
], id='save'),
pytest.param(
[
('text', '= Heading =\n\nNew page here!\n'),
('comment', 'Testing choosing cancel button'),
('cancel', 'Cancel'),
('text', '= Heading =\n\nNew page here!\n')
], id='cancel'),
])
def test_choose_submit(expected_post):
browser, url = setup_mock_browser(expected_post=expected_post)
browser.open(url)
form = browser.select_form('#choose-submit-form')
browser['text'] = expected_post[2][1]
browser['comment'] = expected_post[0][1]
form.choose_submit(expected_post[1][0])
browser['text'] = dict(expected_post)['text']
browser['comment'] = dict(expected_post)['comment']
form.choose_submit(expected_post[2][0])
res = browser.submit_selected()
assert(res.status_code == 200 and res.text == 'Success!')

Expand Down Expand Up @@ -313,6 +313,38 @@ def test_form_not_found():
form.set_select({'entree': ('no_multiple', 'no_multiple')})


def test_form_set_radio_checkbox(capsys):
browser = mechanicalsoup.StatefulBrowser()
browser.open_fake_page(page_with_various_fields,
url="http://example.com/invalid/")
form = browser.select_form("form")
form.set_radio({"size": "small"})
form.set_checkbox({"topping": "cheese"})
browser.get_current_form().print_summary()
out, err = capsys.readouterr()
# Different versions of bs4 show either <input></input> or
# <input/>. Normalize before comparing.
out = out.replace('></input>', '/>')
assert out == """<input name="foo"/>
<textarea name="bar"></textarea>
<select name="entree">
<option selected="selected" value="tofu">Tofu Stir Fry</option>
<option value="curry">Red Curry</option>
<option value="tempeh">Tempeh Tacos</option>
</select>
<input name="topping" type="checkbox" value="bacon"/>
<input checked="" name="topping" type="Checkbox" value="cheese"/>
<input name="topping" type="checkbox" value="onion"/>
<input name="topping" type="checkbox" value="mushroom"/>
<input checked="" name="size" type="Radio" value="small"/>
<input name="size" type="radio" value="medium"/>
<input name="size" type="radio" value="large"/>
<button name="action" value="cancel">Cancel</button>
<input type="submit" value="Select"/>
"""
assert err == ""


page_with_radio = '''
<html>
<form method="post">
Expand Down Expand Up @@ -351,14 +383,14 @@ def test_form_check_uncheck():
<legend> Pizza Toppings </legend>
<p><label> <input type=checkbox name="topping"
value="bacon"> Bacon </label></p>
<p><label> <input type=checkbox name="topping"
<p><label> <input type=Checkbox name="topping"
value="cheese" checked>Extra Cheese </label></p>
<p><label> <input type=checkbox name="topping"
value="onion" checked> Onion </label></p>
<p><label> <input type=checkbox name="topping"
value="mushroom"> Mushroom </label></p>
</fieldset>
<p><input name="size" type=radio value="small">Small</p>
<p><input name="size" type=Radio value="small">Small</p>
<p><input name="size" type=radio value="medium">Medium</p>
<p><input name="size" type=radio value="large">Large</p>
<button name="action" value="cancel">Cancel</button>
Expand Down Expand Up @@ -386,10 +418,10 @@ def test_form_print_summary(capsys):
<option value="tempeh">Tempeh Tacos</option>
</select>
<input name="topping" type="checkbox" value="bacon"/>
<input checked="" name="topping" type="checkbox" value="cheese"/>
<input checked="" name="topping" type="Checkbox" value="cheese"/>
<input checked="" name="topping" type="checkbox" value="onion"/>
<input name="topping" type="checkbox" value="mushroom"/>
<input name="size" type="radio" value="small"/>
<input name="size" type="Radio" value="small"/>
<input name="size" type="radio" value="medium"/>
<input name="size" type="radio" value="large"/>
<button name="action" value="cancel">Cancel</button>
Expand Down Expand Up @@ -444,11 +476,11 @@ def test_choose_submit_buttons(expected_post):
"""Buttons of type reset and button are not valid submits"""
text = """
<form method="post" action="mock://form.com/post">
<button type="button" name="sub1" value="val1">Val1</button>
<button type="submit" name="sub2" value="val2">Val2</button>
<button type="butTon" name="sub1" value="val1">Val1</button>
<button type="suBmit" name="sub2" value="val2">Val2</button>
<button type="reset" name="sub3" value="val3">Val3</button>
<button name="sub4" value="val4">Val4</button>
<input type="submit" name="sub5" value="val5">
<input type="subMit" name="sub5" value="val5">
</form>
"""
browser, url = setup_mock_browser(expected_post=expected_post, text=text)
Expand Down
33 changes: 16 additions & 17 deletions tests/test_stateful_browser.py
Expand Up @@ -125,25 +125,25 @@ def test_links():
@pytest.mark.parametrize("expected_post", [
pytest.param(
[
('text', 'Setting some text!'),
('comment', 'Selecting an input submit'),
('diff', 'Review Changes'),
('text', 'Setting some text!')
], id='input'),
pytest.param(
[
('text', '= Heading =\n\nNew page here!\n'),
('comment', 'Selecting a button submit'),
('cancel', 'Cancel'),
('text', '= Heading =\n\nNew page here!\n')
], id='button'),
])
def test_submit_btnName(expected_post):
'''Tests that the btnName argument chooses the submit button.'''
browser, url = setup_mock_browser(expected_post=expected_post)
browser.open(url)
browser.select_form('#choose-submit-form')
browser['text'] = expected_post[2][1]
browser['comment'] = expected_post[0][1]
res = browser.submit_selected(btnName=expected_post[1][0])
browser['text'] = dict(expected_post)['text']
browser['comment'] = dict(expected_post)['comment']
res = browser.submit_selected(btnName=expected_post[2][0])
assert(res.status_code == 200 and res.text == 'Success!')


Expand Down Expand Up @@ -257,7 +257,7 @@ def test_form_noaction():
browser, url = setup_mock_browser()
browser.open_fake_page(submit_form_noaction)
browser.select_form('#choose-submit-form')
with pytest.raises(ValueError, message="no URL to submit to"):
with pytest.raises(ValueError, match="no URL to submit to"):
browser.submit_selected()


Expand Down Expand Up @@ -421,18 +421,17 @@ def test_referer_submit_headers(httpbin):
assert headers['X-Test-Header'] == 'x-test-value'


def test_link_arg_text(httpbin):
browser = mechanicalsoup.StatefulBrowser()
browser.open_fake_page('<a href="/get">Link</a>', httpbin.url)
browser.follow_link(link_text='Link')
assert browser.get_url() == httpbin + '/get'


def test_link_arg_regex(httpbin):
@pytest.mark.parametrize('expected, kwargs', [
pytest.param('/foo', {}, id='none'),
pytest.param('/get', {'text': 'Link'}, id='text'),
pytest.param('/get', {'url_regex': 'get'}, id='regex'),
])
def test_follow_link_arg(httpbin, expected, kwargs):
browser = mechanicalsoup.StatefulBrowser()
browser.open_fake_page('<a href="/get">Link</a>', httpbin.url)
browser.follow_link(url_regex='.*')
assert browser.get_url() == httpbin + '/get'
html = '<a href="/foo">Bar</a><a href="/get">Link</a>'
browser.open_fake_page(html, httpbin.url)
browser.follow_link(**kwargs)
assert browser.get_url() == httpbin + expected


def test_link_arg_multiregex(httpbin):
Expand Down
9 changes: 8 additions & 1 deletion tests/utils.py
@@ -1,5 +1,7 @@
import mechanicalsoup
import requests_mock
from distutils.version import StrictVersion
import bs4
try:
from urllib.parse import parse_qsl
except ImportError:
Expand Down Expand Up @@ -62,7 +64,12 @@ def mock_post(mocked_adapter, url, expected, reply='Success!'):
def text_callback(request, context):
# Python 2's parse_qsl doesn't like None argument
query = parse_qsl(request.text) if request.text else []
assert (query == expected)
# In bs4 4.7.0+, CSS selectors return elements in page order,
# but did not in earlier versions.
if StrictVersion(bs4.__version__) >= StrictVersion('4.7.0'):
assert query == expected
else:
assert sorted(query) == sorted(expected)
return reply

mocked_adapter.register_uri('POST', url, text=text_callback)
Expand Down