forked from MechanicalSoup/MechanicalSoup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
form.py
383 lines (320 loc) · 15.2 KB
/
form.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
from __future__ import print_function
import copy
import warnings
from .utils import LinkNotFoundError
from bs4 import BeautifulSoup
class InvalidFormMethod(LinkNotFoundError):
"""This exception is raised when a method of :class:`Form` is used
for an HTML element that is of the wrong type (or is malformed).
It is caught within :func:`Form.set` to perform element type deduction.
It is derived from :class:`LinkNotFoundError` so that a single base class
can be used to catch all exceptions specific to this module.
"""
pass
class Form(object):
"""Build a fillable form.
:param form: A bs4.element.Tag corresponding to an HTML form element.
The Form class is responsible for preparing HTML forms for submission.
It handles the following types of elements:
input (text, checkbox, radio), select, and textarea.
Each type is set by a method named after the type (e.g.
:func:`~Form.set_select`), and then there are convenience methods (e.g.
:func:`~Form.set`) that do type-deduction and set the value using the
appropriate method.
It also handles submit-type elements using :func:`~Form.choose_submit`.
"""
def __init__(self, form):
if form.name != 'form':
warnings.warn(
"Constructed a Form from a '{}' instead of a 'form' element. "
"This may be an error in a future version of MechanicalSoup.",
PendingDeprecationWarning)
self.form = form
self._submit_chosen = False
# Aliases for backwards compatibility
# (Included specifically in __init__ to suppress them in Sphinx docs)
self.attach = self.set_input
self.input = self.set_input
self.textarea = self.set_textarea
def set_input(self, data):
"""Fill-in a set of fields in a form.
Example: filling-in a login/password form
.. code-block:: python
form.set_input({"login": username, "password": password})
This will find the input element named "login" and give it the
value ``username``, and the input element named "password" and
give it the value ``password``.
"""
for (name, value) in data.items():
i = self.form.find("input", {"name": name})
if not i:
raise InvalidFormMethod("No input field named " + name)
i["value"] = value
def uncheck_all(self, name):
"""Remove the *checked*-attribute of all input elements with
a *name*-attribute given by ``name``.
"""
for option in self.form.find_all("input", {"name": name}):
if "checked" in option.attrs:
del option.attrs["checked"]
def check(self, data):
"""For backwards compatibility, this method handles checkboxes
and radio buttons in a single call. It will not uncheck any
checkboxes unless explicitly specified by ``data``, in contrast
with the default behavior of :func:`~Form.set_checkbox`.
"""
for (name, value) in data.items():
try:
self.set_checkbox({name: value}, uncheck_other_boxes=False)
continue
except InvalidFormMethod:
pass
try:
self.set_radio({name: value})
continue
except InvalidFormMethod:
pass
raise LinkNotFoundError("No input checkbox/radio named " + name)
def set_checkbox(self, data, uncheck_other_boxes=True):
"""Set the *checked*-attribute of input elements of type "checkbox"
specified by ``data`` (i.e. check boxes).
:param data: Dict of ``{name: value, ...}``.
In the family of checkboxes whose *name*-attribute is ``name``,
check the box whose *value*-attribute is ``value``. All boxes in
the family can be checked (unchecked) if ``value`` is True (False).
To check multiple specific boxes, let ``value`` be a tuple or list.
:param uncheck_other_boxes: If True (default), before checking any
boxes specified by ``data``, uncheck the entire checkbox family.
Consider setting to False if some boxes are checked by default when
the HTML is served.
"""
for (name, value) in data.items():
# Case-insensitive search for type=checkbox
checkboxes = self.find_by_type("input", "checkbox", {'name': name})
if not checkboxes:
raise InvalidFormMethod("No input checkbox named " + name)
# uncheck if requested
if uncheck_other_boxes:
self.uncheck_all(name)
# Wrap individual values (e.g. int, str) in a 1-element tuple.
if not isinstance(value, list) and not isinstance(value, tuple):
value = (value,)
# Check or uncheck one or more boxes
for choice in value:
choice_str = str(choice) # Allow for example literal numbers
for checkbox in checkboxes:
if checkbox.attrs.get("value", "on") == choice_str:
checkbox["checked"] = ""
break
# Allow specifying True or False to check/uncheck
elif choice is True:
checkbox["checked"] = ""
break
elif choice is False:
if "checked" in checkbox.attrs:
del checkbox.attrs["checked"]
break
else:
raise LinkNotFoundError(
"No input checkbox named %s with choice %s" %
(name, choice)
)
def set_radio(self, data):
"""Set the *checked*-attribute of input elements of type "radio"
specified by ``data`` (i.e. select radio buttons).
:param data: Dict of ``{name: value, ...}``.
In the family of radio buttons whose *name*-attribute is ``name``,
check the radio button whose *value*-attribute is ``value``.
Only one radio button in the family can be checked.
"""
for (name, value) in data.items():
# Case-insensitive search for type=radio
radios = self.find_by_type("input", "radio", {'name': name})
if not radios:
raise InvalidFormMethod("No input radio named " + name)
# only one radio button can be checked
self.uncheck_all(name)
# Check the appropriate radio button (value cannot be a list/tuple)
for radio in radios:
if radio.attrs.get("value", "on") == str(value):
radio["checked"] = ""
break
else:
raise LinkNotFoundError(
"No input radio named %s with choice %s" % (name, value)
)
def set_textarea(self, data):
"""Set the *string*-attribute of the first textarea element
specified by ``data`` (i.e. set the text of a textarea).
:param data: Dict of ``{name: value, ...}``.
The textarea whose *name*-attribute is ``name`` will have
its *string*-attribute set to ``value``.
"""
for (name, value) in data.items():
t = self.form.find("textarea", {"name": name})
if not t:
raise InvalidFormMethod("No textarea named " + name)
t.string = value
def set_select(self, data):
"""Set the *selected*-attribute of the first option element
specified by ``data`` (i.e. select an option from a dropdown).
:param data: Dict of ``{name: value, ...}``.
Find the select element whose *name*-attribute is ``name``.
Then select from among its children the option element whose
*value*-attribute is ``value``. If no matching *value*-attribute
is found, this will search for an option whose text matches
``value``. If the select element's *multiple*-attribute is set,
then ``value`` can be a list or tuple to select multiple options.
"""
for (name, value) in data.items():
select = self.form.find("select", {"name": name})
if not select:
raise InvalidFormMethod("No select named " + name)
# Deselect all options first
for option in select.find_all("option"):
if "selected" in option.attrs:
del option.attrs["selected"]
# Wrap individual values in a 1-element tuple.
# If value is a list/tuple, select must be a <select multiple>.
if not isinstance(value, list) and not isinstance(value, tuple):
value = (value,)
elif "multiple" not in select.attrs:
raise LinkNotFoundError("Cannot select multiple options!")
for choice in value:
option = select.find("option", {"value": choice})
# try to find with text instead of value
if not option:
option = select.find("option", string=choice)
if not option:
raise LinkNotFoundError(
'Option %s not found for select %s' % (choice, name)
)
option.attrs["selected"] = "selected"
def __setitem__(self, name, value):
"""Forwards arguments to :func:`~Form.set`. For example,
:code:`form["name"] = "value"` calls :code:`form.set("name", "value")`.
"""
return self.set(name, value)
def set(self, name, value, force=False):
"""Set a form element identified by ``name`` to a specified ``value``.
The type of element (input, textarea, select, ...) does not
need to be given; it is inferred by the following methods:
:func:`~Form.set_checkbox`,
:func:`~Form.set_radio`,
:func:`~Form.set_input`,
:func:`~Form.set_textarea`,
:func:`~Form.set_select`.
If none of these methods find a matching element, then if ``force``
is True, a new element (``<input type="text" ...>``) will be
added using :func:`~Form.new_control`.
Example: filling-in a login/password form with EULA checkbox
.. code-block:: python
form.set("login", username)
form.set("password", password)
form.set("eula-checkbox", True)
Example: uploading a file through a ``<input type="file"
name="tagname">`` field (provide the path to the local file,
and its content will be uploaded):
.. code-block:: python
form.set("tagname") = path_to_local_file
"""
for func in ("checkbox", "radio", "input", "textarea", "select"):
try:
getattr(self, "set_" + func)({name: value})
return
except InvalidFormMethod:
pass
if force:
self.new_control('text', name, value=value)
return
raise LinkNotFoundError("No valid element named " + name)
def new_control(self, type, name, value, **kwargs):
"""Add a new input element to the form.
The arguments set the attributes of the new element.
"""
old_input = self.form.find_all('input', {'name': name})
for old in old_input:
old.decompose()
old_textarea = self.form.find_all('textarea', {'name': name})
for old in old_textarea:
old.decompose()
# We don't have access to the original soup object (just the
# Tag), so we instantiate a new BeautifulSoup() to call
# new_tag(). We're only building the soup object, not parsing
# anything, so the parser doesn't matter. Specify the one
# included in Python to avoid having dependency issue.
control = BeautifulSoup("", "html.parser").new_tag('input')
control['type'] = type
control['name'] = name
control['value'] = value
for k, v in kwargs.items():
control[k] = v
self.form.append(control)
return control
def choose_submit(self, submit):
"""Selects the input (or button) element to use for form submission.
:param submit: The bs4.element.Tag (or just its *name*-attribute) that
identifies the submit element to use. If ``None``, will choose the
first valid submit element in the form, if one exists.
To simulate a normal web browser, only one submit element must be
sent. Therefore, this does not need to be called if there is only
one submit element in the form.
If the element is not found or if multiple elements match, raise a
:class:`LinkNotFoundError` exception.
Example: ::
browser = mechanicalsoup.StatefulBrowser()
browser.open(url)
form = browser.select_form()
form.choose_submit('form_name_attr')
browser.submit_selected()
"""
# Since choose_submit is destructive, it doesn't make sense to call
# this method twice unless no submit is specified.
if self._submit_chosen:
if submit is None:
return
else:
raise Exception('Submit already chosen. Cannot change submit!')
# All buttons NOT of type (button,reset) are valid submits
inps = (self.find_by_type("input", "submit", dict()) +
self.form.find_all("button"))
inps = [i for i in inps
if i.get('type', '').lower() not in ('button', 'reset')]
# If no submit specified, choose the first one
if submit is None and inps:
submit = inps[0]
found = False
for inp in inps:
if inp == submit or (inp.has_attr('name') and
inp['name'] == submit):
if found:
raise LinkNotFoundError(
"Multiple submit elements match: {0}".format(submit)
)
found = True
else:
# Delete any non-matching element's name so that it will be
# omitted from the submitted form data.
del inp['name']
if not found and submit is not None:
raise LinkNotFoundError(
"Specified submit element not found: {0}".format(submit)
)
self._submit_chosen = True
def print_summary(self):
"""Print a summary of the form.
May help finding which fields need to be filled-in.
"""
for input in self.form.find_all(
("input", "textarea", "select", "button")):
input_copy = copy.copy(input)
# Text between the opening tag and the closing tag often
# contains a lot of spaces that we don't want here.
for subtag in input_copy.find_all() + [input_copy]:
if subtag.string:
subtag.string = subtag.string.strip()
print(input_copy)
def find_by_type(self, tag_name, type_attr, attrs):
attrs_dict = attrs.copy()
attrs_dict['type'] = lambda x: x and x.lower() == type_attr
return self.form.find_all(tag_name, attrs=attrs_dict)