/
goustojson.py
65 lines (50 loc) · 1.96 KB
/
goustojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# mypy: disallow_untyped_defs=False
import requests
from ._abstract import AbstractScraper, HEADERS
from ._utils import get_minutes, get_yields, normalize_string, url_path_to_dict
class GoustoJson(AbstractScraper):
"""
Ad-hoc solution to https://github.com/hhursev/recipe-scrapers/issues/376
Let's see if it stands the test of time and reevaluate.
"""
def __init__(self, url, proxies=None, timeout=None, *args, **kwargs):
super().__init__(url=url, *args, **kwargs)
recipe_slug = url_path_to_dict(url).get("path").split("/")[-1]
data_url = (
f"https://production-api.gousto.co.uk/cmsreadbroker/v1/recipe/{recipe_slug}"
)
recipe_json = requests.get(
data_url,
headers=HEADERS,
proxies=proxies,
timeout=timeout,
).json()
self.page_data = recipe_json.get("data")
self.data = self.page_data.get("entry")
@classmethod
def host(cls):
return "gousto.co.uk"
def title(self):
return self.data.get("title")
def total_time(self):
return get_minutes(sorted(self.data.get("prep_times").values())[-1])
def yields(self):
return get_yields(sorted(self.data.get("prep_times").keys())[-1])
def image(self):
return self.data.get("seo").get("open_graph_image")
def ingredients(self):
return [
normalize_string(ingredient.get("label"))
for ingredient in self.data.get("ingredients")
if isinstance(ingredient, dict) and "label" in ingredient.keys()
]
def instructions(self):
return "\n".join(
[
normalize_string(instruction.get("instruction"))
for instruction in self.data.get("cooking_instructions")
if isinstance(instruction, dict) and "instruction" in instruction.keys()
]
)
def ratings(self):
return self.data.get("rating").get("average")