/
bettybossi.py
76 lines (56 loc) · 2.1 KB
/
bettybossi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# mypy: disallow_untyped_defs=False
from typing import Dict, Optional, Tuple, Union
from requests import Session
from ._abstract import HEADERS, AbstractScraper
class BettyBossi(AbstractScraper):
"""Scrape BettyBossi.ch recipes.
This scraper is particular as the website implements a refresh after
loading the page the first time. It is therefore needed to do two get
requests in a single session, once to initialize the connection, the second
to load the page content.
"""
@classmethod
def host(cls):
return "bettybossi.ch"
def __init__(
self,
url: str,
proxies: Optional[
Dict[str, str]
] = None, # allows us to specify optional proxy server
timeout: Optional[
Union[float, Tuple[float, float], Tuple[float, None]]
] = None, # allows us to specify optional timeout for request
wild_mode: Optional[bool] = False,
html: Union[str, bytes, None] = None,
) -> None:
if html is None:
with Session() as session:
session.proxies.update(proxies or {})
session.headers.update(HEADERS)
session.get(url, timeout=timeout)
html = session.get(url, timeout=timeout).content # reload the page
# As the html content is provided, the parent will not query the page
super().__init__(url, proxies, timeout, wild_mode, html)
def author(self):
return self.schema.author()
def title(self):
return self.schema.title()
def category(self):
return self.schema.category()
def total_time(self):
return self.schema.total_time()
def yields(self):
return self.schema.yields()
def image(self):
return self.schema.image()
def ingredients(self):
return self.schema.ingredients()
def instructions(self):
return self.schema.instructions()
def ratings(self):
return self.schema.ratings()
def cuisine(self):
return self.schema.cuisine()
def description(self):
return self.schema.description()