/
woolworths.py
78 lines (57 loc) · 1.86 KB
/
woolworths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# mypy: disallow_untyped_defs=False
import requests
from ._abstract import HEADERS, AbstractScraper
from ._schemaorg import SchemaOrg
from ._utils import url_path_to_dict
class Woolworths(AbstractScraper):
def __init__(self, url, proxies=None, timeout=None, *args, **kwargs):
super().__init__(url=url, *args, **kwargs)
target = url_path_to_dict(url)["path"].split("/")[-1]
data_url = f"https://foodhub.woolworths.com.au/content/woolworths-foodhub/en/{target}.model.json"
self.page_data = (
requests.get(
data_url,
headers=HEADERS,
proxies=proxies,
timeout=timeout,
)
.json()
.get(":items")
.get("root")
.get(":items")
.get("recipe_seo_data")
)
self.schema = SchemaOrg(self.page_data, raw=True)
@classmethod
def host(cls):
return "woolworths.com.au"
def canonical_url(self):
return self.url
def title(self):
return self.schema.title()
def category(self):
return self.schema.category()
def total_time(self):
return self.schema.total_time()
def cook_time(self):
return self.schema.cook_time()
def prep_time(self):
return self.schema.prep_time()
def yields(self):
return self.schema.yields()
def image(self):
return self.schema.image()
def nutrients(self):
return self.schema.nutrients()
def language(self):
return "en-AU"
def ingredients(self):
return self.schema.ingredients()
def instructions(self):
return self.schema.instructions()
def author(self):
return self.schema.author()
def cuisine(self):
return self.schema.cuisine()
def site_name(self):
return "Woolworths | Fresh Ideas For You"