From df6fef49fd305489deba8234e1f1e9c390560a11 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 12 Oct 2022 23:36:24 +0200 Subject: [PATCH 01/16] Initial Checkin Weightwatchers --- README.rst | 1 + recipe_scrapers/__init__.py | 2 + tests/test_data/weightwatchers.testhtml | 191 ++++++++++++++++++++++++ 3 files changed, 194 insertions(+) create mode 100644 tests/test_data/weightwatchers.testhtml diff --git a/README.rst b/README.rst index 38cdc9069..9b804c367 100644 --- a/README.rst +++ b/README.rst @@ -289,6 +289,7 @@ Scrapers available for: - `https://vegolosi.it/ `_ - `https://vegrecipesofindia.com/ `_ - `https://watchwhatueat.com/ `_ +- `https://www.weightwatchers.com/ `_ - `https://whatsgabycooking.com/ `_ - `https://www.wholefoodsmarket.com/ `_ - `https://www.wholefoodsmarket.co.uk/ `_ diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index d36ddda2d..2f5fd2f35 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -206,6 +206,7 @@ from .vegolosi import Vegolosi from .vegrecipesofindia import VegRecipesOfIndia from .watchwhatueat import WatchWhatUEat +from .weightwatchers import Weightwatchers from .whatsgabycooking import WhatsGabyCooking from .wholefoods import WholeFoods from .wikicookbook import WikiCookbook @@ -431,6 +432,7 @@ VegRecipesOfIndia.host(): VegRecipesOfIndia, Vegolosi.host(): Vegolosi, WatchWhatUEat.host(): WatchWhatUEat, + Weightwatchers.host(): Weightwatchers, WhatsGabyCooking.host(): WhatsGabyCooking, WholeFoods.host(): WholeFoods, WholeFoods.host(domain="co.uk"): WholeFoods, diff --git a/tests/test_data/weightwatchers.testhtml b/tests/test_data/weightwatchers.testhtml new file mode 100644 index 000000000..1549d1506 --- /dev/null +++ b/tests/test_data/weightwatchers.testhtml @@ -0,0 +1,191 @@ + + +Details | Tagebuch

Würstchengulasch mit Nudeln

earn 12 personal points
You'll earn
+2 Punkte von 2 Portion(en) Gemüse
0:25 minutes Total Time
0:25 minutes Preparation Time
- minutes Cook Time
Serves 2 people
Difficulty Level: Leicht
18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.
Portion(en)
Tageszeit
Datum

Zutaten

Geflügelwürstchen
2Stück
8 personal points
Zwiebel/n
1Stück, klein
0 personal points
Champignons, frisch
200g, braun
earn 0 personal points
Nudeln, trocken, jede Sorte
120g, Spiralnudeln
13 personal points
Salz/Jodsalz
1Prise(n)
0 personal points
Pflanzenöl, Rapsöl/Sonnenblumenöl
2TL
3 personal points
Tomaten, passiert
400g
earn 0 personal points
Pfeffer
1Prise(n)
0 personal points
Paprikapulver
12TL
0 personal points

Anleitung

  1. Würstchen in Scheiben schneiden. Zwiebel schälen und würfeln. Champignons trocken abreiben und vierteln. Nudeln nach Packungsanweisung in Salzwasser garen.
  2. Öl in einem Topf erhitzen und Zwiebelwürfel darin andünsten. Würstchenscheiben und Champignonviertel zufügen und ca. 3 Minuten anbraten. Mit Tomaten ablöschen, aufkochen und ca. 5 Minuten köcheln lassen. Würstchengulasch mit Salz, Pfeffer und Paprikapulver würzen. Nudeln abgießen, untermischen und in einer Frischhaltedose transportieren. Würstchengulasch erwärmen und servieren.
+ + + +
+ + + + + + +
\ No newline at end of file From e3f53e7b28c020dc5a8a99720afbce1370ef3a2c Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Thu, 13 Oct 2022 02:21:12 +0200 Subject: [PATCH 02/16] Scraping first parts --- recipe_scrapers/weightwatchers.py | 67 +++++++++++++++++++++++++++++++ tests/test_weightwatchers.py | 54 +++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 recipe_scrapers/weightwatchers.py create mode 100644 tests/test_weightwatchers.py diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py new file mode 100644 index 000000000..39c7c9016 --- /dev/null +++ b/recipe_scrapers/weightwatchers.py @@ -0,0 +1,67 @@ +# mypy: allow-untyped-defs + +import re + +from ._abstract import AbstractScraper +from ._utils import get_minutes, get_yields + + +class Weightwatchers(AbstractScraper): + @classmethod + def host(cls): + return "weightwatchers.de" + + def author(self): + return "WeightWatchers" + + def title(self): + return self.soup.find("h1").get_text().strip() + + def category(self): + return "WeightWatchers" + + def total_time(self): + return get_minutes( + self.soup.find("div", {"class": "styles_container__3N3E8"}) + .find("div", string=re.compile(r"minutes Total Time")) + .previous_sibling + ) + + def cook_time(self): + return get_minutes( + self.soup.find("div", {"class": "styles_container__3N3E8"}) + .find("div", string=re.compile(r"minutes Preparation Time")) + .previous_sibling + ) + + def prep_time(self): + return get_minutes( + self.soup.find("div", {"class": "styles_container__3N3E8"}) + .find("div", string=re.compile(r"minutes Cook Time")) + .previous_sibling + ) + + def yields(self): + return get_yields( + self.soup.find("div", {"class": "styles_container__3N3E8"}) + .find("div", string=re.compile(r"Serves *{0..9} people")) + .previous_sibling + ) + + def image(self): + return self.schema.image() + + def ingredients(self): + return self.schema.ingredients() + + def instructions(self): + return self.schema.instructions() + + def ratings(self): + return self.schema.ratings() + + def cuisine(self): + return self.schema.cuisine() + + def description(self): + return self.soup.find("div", {"class": "copy-1"}).get_text().strip() diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py new file mode 100644 index 000000000..7861991f8 --- /dev/null +++ b/tests/test_weightwatchers.py @@ -0,0 +1,54 @@ +# mypy: allow-untyped-defs + +from recipe_scrapers.weightwatchers import Weightwatchers +from tests import ScraperTest + + +class TestWeightwatchersScraper(ScraperTest): + + scraper_class = Weightwatchers + + def test_host(self): + self.assertEqual("weightwatchers.de", self.harvester_class.host()) + + def test_author(self): + self.assertEqual("WeightWatchers", self.harvester_class.author()) + + def test_title(self): + self.assertEqual(self.harvester_class.title(), "Würstchengulasch mit Nudeln") + + def test_category(self): + self.assertEqual("WeightWatchers", self.harvester_class.category()) + + def test_total_time(self): + self.assertEqual(25, self.harvester_class.total_time()) + + def test_cook_time(self): + self.assertEqual(0, self.harvester_class.cook_time()) + + def test_prep_time(self): + self.assertEqual(25, self.harvester_class.prep_time()) + + def test_yields(self): + self.assertEqual("2 servings", self.harvester_class.yields()) + + # def test_image(self): + # self.assertEqual(None, self.harvester_class.image()) + + # def test_ingredients(self): + # self.assertEqual(None, self.harvester_class.ingredients()) + + # def test_instructions(self): + # self.assertEqual(None, self.harvester_class.instructions()) + + # def test_ratings(self): + # self.assertEqual(None, self.harvester_class.ratings()) + + # def test_cuisine(self): + # self.assertEqual(None, self.harvester_class.cuisine()) + + def test_description(self): + self.assertEqual( + "18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.", + self.harvester_class.description(), + ) From e9a838c09dae633895a4e9ee5cb1a9935a162303 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sat, 15 Oct 2022 00:21:25 +0200 Subject: [PATCH 03/16] scrape times and servings --- recipe_scrapers/weightwatchers.py | 15 +++++++++++---- tests/test_weightwatchers.py | 3 +++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 39c7c9016..dbe08b503 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -30,22 +30,29 @@ def total_time(self): def cook_time(self): return get_minutes( self.soup.find("div", {"class": "styles_container__3N3E8"}) - .find("div", string=re.compile(r"minutes Preparation Time")) + .find("div", string=re.compile(r"minutes Cook Time")) .previous_sibling ) def prep_time(self): return get_minutes( self.soup.find("div", {"class": "styles_container__3N3E8"}) - .find("div", string=re.compile(r"minutes Cook Time")) + .find("div", string=re.compile(r"minutes Preparation Time")) .previous_sibling ) def yields(self): return get_yields( + self.soup.find("div", {"class": "styles_container__3N3E8"}).find( + "div", string=re.compile(r"Serves \d+ people") + ) + ) + + def difficulty(self): + return ( self.soup.find("div", {"class": "styles_container__3N3E8"}) - .find("div", string=re.compile(r"Serves *{0..9} people")) - .previous_sibling + .find("div", string=re.compile(r"Difficulty Level:")) + .previous_sibling.get_text() ) def image(self): diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 7861991f8..8301b6e15 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -52,3 +52,6 @@ def test_description(self): "18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.", self.harvester_class.description(), ) + + def test_difficulty(self): + self.assertEqual("Leicht", self.harvester_class.difficulty()) From 7153f2b119063b9364a04d665eaa840a326a9117 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sat, 15 Oct 2022 00:23:10 +0200 Subject: [PATCH 04/16] added formats like 1:20 (80min) to get_minutes --- recipe_scrapers/_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index e35511923..b5da34e61 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -1,10 +1,11 @@ # mypy: disallow_untyped_defs=False import html -import isodate import math import re +import isodate + from ._exceptions import ElementNotFoundInHtml FRACTIONS = { @@ -19,7 +20,7 @@ } TIME_REGEX = re.compile( - r"(\D*(?P[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra))?(\D*(?P\d+)\s*(minutes|mins|min|m|perc))?", + r"(\D*(?P[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra|:))?(\D*(?P\d+)\s*(minutes|mins|min|m|perc)?)?", re.IGNORECASE, ) From e47c8c7ad112ca91ece713607ac0b1d251745b77 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sat, 15 Oct 2022 03:00:26 +0200 Subject: [PATCH 05/16] scrape image url --- recipe_scrapers/_utils.py | 2 +- recipe_scrapers/weightwatchers.py | 16 +++++++++------- tests/test_weightwatchers.py | 16 ++++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index b5da34e61..18bf4ca11 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -20,7 +20,7 @@ } TIME_REGEX = re.compile( - r"(\D*(?P[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra|:))?(\D*(?P\d+)\s*(minutes|mins|min|m|perc)?)?", + r"(\D*(?P[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra|:))?(\D*(?P\d+)\s*(minutes|mins|min|m|perc|$))?", re.IGNORECASE, ) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index dbe08b503..e68fd358a 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -56,7 +56,15 @@ def difficulty(self): ) def image(self): - return self.schema.image() + backgroundImgStyle = self.soup.find("div", {"class": "styles_image__2dnNm"})[ + "style" + ] + return ( + re.compile(r'url\("(?P\S*)"\);') + .search(backgroundImgStyle) + .groupdict() + .get("imgurl") + ) def ingredients(self): return self.schema.ingredients() @@ -64,11 +72,5 @@ def ingredients(self): def instructions(self): return self.schema.instructions() - def ratings(self): - return self.schema.ratings() - - def cuisine(self): - return self.schema.cuisine() - def description(self): return self.soup.find("div", {"class": "copy-1"}).get_text().strip() diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 8301b6e15..8b1c0325c 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -6,6 +6,9 @@ class TestWeightwatchersScraper(ScraperTest): + # Test-Url: + # https://cmx.weightwatchers.de/details/WWRECIPE:5667ab72a29713e4335bb342 + scraper_class = Weightwatchers def test_host(self): @@ -32,8 +35,11 @@ def test_prep_time(self): def test_yields(self): self.assertEqual("2 servings", self.harvester_class.yields()) - # def test_image(self): - # self.assertEqual(None, self.harvester_class.image()) + def test_image(self): + self.assertEqual( + "https://cmx.weightwatchers.com/assets-proxy/weight-watchers/image/upload/t_WINE_EXTRALARGE/i34cskr1hxegmxqukawd.jpg", + self.harvester_class.image(), + ) # def test_ingredients(self): # self.assertEqual(None, self.harvester_class.ingredients()) @@ -41,12 +47,6 @@ def test_yields(self): # def test_instructions(self): # self.assertEqual(None, self.harvester_class.instructions()) - # def test_ratings(self): - # self.assertEqual(None, self.harvester_class.ratings()) - - # def test_cuisine(self): - # self.assertEqual(None, self.harvester_class.cuisine()) - def test_description(self): self.assertEqual( "18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.", From 08df5d21b44b9df211d774b09b29ae6dd23e8296 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sun, 16 Oct 2022 20:19:14 +0200 Subject: [PATCH 06/16] scrape instructions --- recipe_scrapers/weightwatchers.py | 12 ++++++++++-- tests/test_weightwatchers.py | 24 ++++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index e68fd358a..12847c600 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -3,7 +3,7 @@ import re from ._abstract import AbstractScraper -from ._utils import get_minutes, get_yields +from ._utils import get_minutes, get_yields, normalize_string class Weightwatchers(AbstractScraper): @@ -70,7 +70,15 @@ def ingredients(self): return self.schema.ingredients() def instructions(self): - return self.schema.instructions() + instructions = self.soup.find( + "h3", {"id": "food-detail-recipe-instruction-header"} + ).parent.find("ol") + return "\n".join( + [ + normalize_string(instruction.get_text()) + for instruction in instructions.find_all("div", {"class": "copy-1"}) + ] + ) def description(self): return self.soup.find("div", {"class": "copy-1"}).get_text().strip() diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 8b1c0325c..9ac803a3d 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -41,11 +41,27 @@ def test_image(self): self.harvester_class.image(), ) - # def test_ingredients(self): - # self.assertEqual(None, self.harvester_class.ingredients()) + def test_ingredients(self): + self.assertEqual( + [ + "2Stück Geflügelwürstchen", + "1Stück, klein Zwiebel", + "Champignons, frisch 200g, braun", + "Nudeln, trocken, jede Sorte 120g, Spiralnudeln", + "Salz/Jodsalz 1Prise(n)", + "Pflanzenöl, Rapsöl/Sonnenblumenöl 2TL", + "Tomaten, passiert 400g", + "Pfeffer 1Prise(n)", + "Paprikapulver 1⁄2TL", + ], + self.harvester_class.ingredients(), + ) - # def test_instructions(self): - # self.assertEqual(None, self.harvester_class.instructions()) + def test_instructions(self): + self.assertEqual( + "Würstchen in Scheiben schneiden. Zwiebel schälen und würfeln. Champignons trocken abreiben und vierteln. Nudeln nach Packungsanweisung in Salzwasser garen.\nÖl in einem Topf erhitzen und Zwiebelwürfel darin andünsten. Würstchenscheiben und Champignonviertel zufügen und ca. 3 Minuten anbraten. Mit Tomaten ablöschen, aufkochen und ca. 5 Minuten köcheln lassen. Würstchengulasch mit Salz, Pfeffer und Paprikapulver würzen. Nudeln abgießen, untermischen und in einer Frischhaltedose transportieren. Würstchengulasch erwärmen und servieren.", + self.harvester_class.instructions(), + ) def test_description(self): self.assertEqual( From 3af43407220e6cd0a8b70fae179fd698891de13a Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sun, 16 Oct 2022 23:45:20 +0200 Subject: [PATCH 07/16] scrape ingridients --- README.rst | 4 ++-- recipe_scrapers/weightwatchers.py | 30 +++++++++++++++++++++++++++++- tests/test_weightwatchers.py | 18 +++++++++--------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index c2cb3d748..9570f8f09 100644 --- a/README.rst +++ b/README.rst @@ -291,7 +291,7 @@ Scrapers available for: - `https://vegolosi.it/ `_ - `https://vegrecipesofindia.com/ `_ - `https://watchwhatueat.com/ `_ -- `https://www.weightwatchers.com/ `_ +- `https://www.weightwatchers.com/ `_(*) - `https://whatsgabycooking.com/ `_ - `https://www.wholefoodsmarket.com/ `_ - `https://www.wholefoodsmarket.co.uk/ `_ @@ -303,7 +303,7 @@ Scrapers available for: - `https://zeit.de/ (wochenmarkt) `_ - `https://zenbelly.com/ `_ - +(*) offline saved files only. Page requirtes login Contribute ---------- diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 12847c600..d0f8dc5e6 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -66,8 +66,36 @@ def image(self): .get("imgurl") ) + def __parseIngridient(self, ingridient): + ingridientName = normalize_string( + ingridient.find("div", {"class": "styles_ingredientName__1Vffd"}) + .find("div") + .get_text() + ) + portionParts = ingridient.find( + "div", {"class": "styles_portion__2NQyq"} + ).find_all("span") + amount = ( + normalize_string(portionParts[0].get_text()) + + " " + + normalize_string(portionParts[1].get_text()) + ) + + if portionParts[2].get_text(): + ingridientName += "; " + normalize_string( + portionParts[2].get_text() + ).replace(", ", "") + + return amount + " " + ingridientName + def ingredients(self): - return self.schema.ingredients() + result = [] + ingridients = self.soup.find( + "h3", {"id": "food-detail-recipe-ingredients-header"} + ).parent.find_all("div", {"class": "styles_name__1OYVU"}) + for ingridient in ingridients: + result.append(self.__parseIngridient(ingridient)) + return result def instructions(self): instructions = self.soup.find( diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 9ac803a3d..c2c043cbb 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -44,15 +44,15 @@ def test_image(self): def test_ingredients(self): self.assertEqual( [ - "2Stück Geflügelwürstchen", - "1Stück, klein Zwiebel", - "Champignons, frisch 200g, braun", - "Nudeln, trocken, jede Sorte 120g, Spiralnudeln", - "Salz/Jodsalz 1Prise(n)", - "Pflanzenöl, Rapsöl/Sonnenblumenöl 2TL", - "Tomaten, passiert 400g", - "Pfeffer 1Prise(n)", - "Paprikapulver 1⁄2TL", + "2 Stück Geflügelwürstchen", + "1 Stück, klein Zwiebel/n", + "200 g Champignons, frisch; braun", + "120 g Nudeln, trocken, jede Sorte; Spiralnudeln", + "1 Prise(n) Salz/Jodsalz", + "2 TL Pflanzenöl, Rapsöl/Sonnenblumenöl", + "400 g Tomaten, passiert", + "1 Prise(n) Pfeffer", + "1⁄2 TL Paprikapulver", ], self.harvester_class.ingredients(), ) From 5c474d4ced69676322c0252681e7f7006562ae88 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Mon, 17 Oct 2022 00:24:28 +0200 Subject: [PATCH 08/16] scrape nutrients --- recipe_scrapers/weightwatchers.py | 19 +++++++++++++++++++ tests/test_weightwatchers.py | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index d0f8dc5e6..0727399fb 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -110,3 +110,22 @@ def instructions(self): def description(self): return self.soup.find("div", {"class": "copy-1"}).get_text().strip() + + def nutrients(self): + result = ( + self.soup.find( + "img", {"class": "styles_positivePointsIcon__2XYyV"} + ).next_sibling.get_text() + + " personal points" + ) + veggiepoints = self.soup.find( + "div", {"class": "styles_vegetableServings__2YSPy"} + ) + if veggiepoints: + result += "\n" + normalize_string( + veggiepoints.find( + "div", {"class": "styles_container__2p-YG"} + ).next_sibling.get_text() + ) + + return result diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index c2c043cbb..747b9ee7a 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -71,3 +71,9 @@ def test_description(self): def test_difficulty(self): self.assertEqual("Leicht", self.harvester_class.difficulty()) + + def test_nutrients(self): + self.assertEqual( + "12 personal points\n+2 Punkte von 2 Portion(en) Gemüse", + self.harvester_class.nutrients(), + ) From f8299b01b4a3b8384d33bebe31272d6e3408542b Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 19 Oct 2022 01:16:38 +0200 Subject: [PATCH 09/16] added scraper for public weightwatchers --- README.rst | 3 +- recipe_scrapers/__init__.py | 2 + recipe_scrapers/weightwatchers.py | 15 ++-- recipe_scrapers/weightwatcherspublic.py | 41 +++++++++ tests/test_data/weightwatcherspublic.testhtml | 29 +++++++ tests/test_weightwatchers.py | 2 +- tests/test_weightwatcherspublic.py | 86 +++++++++++++++++++ 7 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 recipe_scrapers/weightwatcherspublic.py create mode 100644 tests/test_data/weightwatcherspublic.testhtml create mode 100644 tests/test_weightwatcherspublic.py diff --git a/README.rst b/README.rst index 9570f8f09..c977e5473 100644 --- a/README.rst +++ b/README.rst @@ -303,7 +303,8 @@ Scrapers available for: - `https://zeit.de/ (wochenmarkt) `_ - `https://zenbelly.com/ `_ -(*) offline saved files only. Page requirtes login +(*) offline saved files only. Page requires login + Contribute ---------- diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index 0e143a508..22bedcb06 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -209,6 +209,7 @@ from .vegrecipesofindia import VegRecipesOfIndia from .watchwhatueat import WatchWhatUEat from .weightwatchers import Weightwatchers +from .weightwatcherspublic import weightwatchersPublic from .whatsgabycooking import WhatsGabyCooking from .wholefoods import WholeFoods from .wikicookbook import WikiCookbook @@ -447,6 +448,7 @@ Yummly.host(): Yummly, ZeitWochenmarkt.host(): ZeitWochenmarkt, ZenBelly.host(): ZenBelly, + weightwatchersPublic.host(): weightwatchersPublic, } diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 0727399fb..8d95b9bc8 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -9,7 +9,7 @@ class Weightwatchers(AbstractScraper): @classmethod def host(cls): - return "weightwatchers.de" + return "www.weightwatchers.com" def author(self): return "WeightWatchers" @@ -17,40 +17,43 @@ def author(self): def title(self): return self.soup.find("h1").get_text().strip() + def __findDataContainer(self): + return self.soup.find("div", {"class": "styles_container__3N3E8"}) + def category(self): return "WeightWatchers" def total_time(self): return get_minutes( - self.soup.find("div", {"class": "styles_container__3N3E8"}) + self.__findDataContainer() .find("div", string=re.compile(r"minutes Total Time")) .previous_sibling ) def cook_time(self): return get_minutes( - self.soup.find("div", {"class": "styles_container__3N3E8"}) + self.__findDataContainer() .find("div", string=re.compile(r"minutes Cook Time")) .previous_sibling ) def prep_time(self): return get_minutes( - self.soup.find("div", {"class": "styles_container__3N3E8"}) + self.__findDataContainer() .find("div", string=re.compile(r"minutes Preparation Time")) .previous_sibling ) def yields(self): return get_yields( - self.soup.find("div", {"class": "styles_container__3N3E8"}).find( + self.__findDataContainer().find( "div", string=re.compile(r"Serves \d+ people") ) ) def difficulty(self): return ( - self.soup.find("div", {"class": "styles_container__3N3E8"}) + self.__findDataContainer() .find("div", string=re.compile(r"Difficulty Level:")) .previous_sibling.get_text() ) diff --git a/recipe_scrapers/weightwatcherspublic.py b/recipe_scrapers/weightwatcherspublic.py new file mode 100644 index 000000000..e9ed3c6ad --- /dev/null +++ b/recipe_scrapers/weightwatcherspublic.py @@ -0,0 +1,41 @@ +# mypy: allow-untyped-defs + +from ._utils import get_minutes, get_yields +from .weightwatchers import Weightwatchers + + +class weightwatchersPublic(Weightwatchers): + @classmethod + def host(cls): + return "www.weightwatchers.com" + + def __findDataContainer(self): + return self.soup.find("div", {"class": "HorizontalList_list__GESs0"}) + + def __extractItemField(self, item): + return item.find("div", {"data-e2e-name": "attribute_item_value"}) + + def total_time(self): + return get_minutes( + self.__extractItemField(self.__findDataContainer().contents[0]) + ) + + def cook_time(self): + return get_minutes( + self.__extractItemField(self.__findDataContainer().contents[2]) + ) + + def prep_time(self): + return get_minutes( + self.__extractItemField(self.__findDataContainer().contents[1]) + ) + + def yields(self): + return get_yields( + self.__extractItemField(self.__findDataContainer().contents[3]) + ) + + def difficulty(self): + return self.__extractItemField( + self.__findDataContainer().contents[4] + ).get_text() diff --git a/tests/test_data/weightwatcherspublic.testhtml b/tests/test_data/weightwatcherspublic.testhtml new file mode 100644 index 000000000..bb6265d50 --- /dev/null +++ b/tests/test_data/weightwatcherspublic.testhtml @@ -0,0 +1,29 @@ + + + Kartoffelgulasch Rezept | WW Deutschland + + + + + + +
Foto Kartoffelgulasch von WW

Kartoffelgulasch

10 - 13
PersonalPoints™ pro Portion
Gesamtzeit
40 min
Zubereitungsdauer
40 min
Garzeit
0 min
Portion(en)
4
Schwierigkeitsgrad
Leicht
Das Rezept zaubert ein saftiges, würziges Gericht auf den Tisch und schmeckt garantiert.

Zutaten

Kartoffeln

800 g, vorwiegend festkochend

Zwiebel/n

2 Stück, mittelgroß

Paprika

2 Stück, rot

Paprika

2 Stück, grün

Petersilie

1 EL, gehackt

Paprika

2 Stück, gelb

Tomaten, frisch

250 g

Wiener Würstchen

4 Stück

Pflanzenöl, Rapsöl/Sonnenblumenöl

2 TL

Tomatenmark

2 EL

Gemüsebouillon/Gemüsebrühe, zubereitet

250 ml, (1 TL Instantpulver)

Oregano

1 TL, gehackt

Paprikapulver

1 TL

Salz/Jodsalz

1 Prise(n)

Pfeffer

1 Prise(n)

Schmand, 24 % Fett

2 EL

Anleitung

  1. Kartoffeln und Zwiebeln schälen. Kartoffeln würfeln und Zwiebeln in Streifen schneiden. Paprika waschen, entkernen und in Stücke schneiden. Tomaten waschen und in Spalten schneiden. Würstchen in Scheiben schneiden.
  2. Öl in einem Topf erhitzen, Kartoffelwürfel und Zwiebelstreifen darin ca. 5 Minuten anbraten. Paprikastücke zufügen, Tomatenmark einrühren und kurz mitbraten. Mit Brühe ablöschen und zugedeckt ca. 15 Minuten garen.
  3. Kartoffelgulasch mit Oregano, Paprikapulver, Salz und Pfeffer würzen. Würstchenscheiben und Tomatenspalten zufügen und weitere ca. 5 Minuten garen. Kartoffelgulasch mit Salz und Pfeffer abschmecken. Mit einem Klecks Schmand und Petersilie bestreut servieren.
\ No newline at end of file diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 747b9ee7a..a356275d4 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -12,7 +12,7 @@ class TestWeightwatchersScraper(ScraperTest): scraper_class = Weightwatchers def test_host(self): - self.assertEqual("weightwatchers.de", self.harvester_class.host()) + self.assertEqual("www.weightwatchers.com", self.harvester_class.host()) def test_author(self): self.assertEqual("WeightWatchers", self.harvester_class.author()) diff --git a/tests/test_weightwatcherspublic.py b/tests/test_weightwatcherspublic.py new file mode 100644 index 000000000..f5e75ab93 --- /dev/null +++ b/tests/test_weightwatcherspublic.py @@ -0,0 +1,86 @@ +# mypy: allow-untyped-defs + +from recipe_scrapers.weightwatcherspublic import weightwatchersPublic +from tests import ScraperTest + + +class TestweightwatchersPublicScraper(ScraperTest): + + scraper_class = weightwatchersPublic + + # Test-Url: + # https://www.weightwatchers.com/de/rezept/kartoffelgulasch/562a9b02873e1afb2a3c4c13 + + def test_host(self): + self.assertEqual("www.weightwatchers.com", self.harvester_class.host()) + + def test_author(self): + self.assertEqual("WeightWatchers", self.harvester_class.author()) + + def test_title(self): + self.assertEqual(self.harvester_class.title(), "Kartoffelgulasch") + + def test_category(self): + self.assertEqual("WeightWatchers", self.harvester_class.category()) + + def test_total_time(self): + self.assertEqual(40, self.harvester_class.total_time()) + + def test_cook_time(self): + self.assertEqual(0, self.harvester_class.cook_time()) + + def test_prep_time(self): + self.assertEqual(40, self.harvester_class.prep_time()) + + def test_yields(self): + self.assertEqual("4 servings", self.harvester_class.yields()) + + def test_image(self): + self.assertEqual( + "https://cmx.weightwatchers.com/assets-proxy/weight-watchers/image/upload/q_auto/h7wo0hbnwcleucj30sbw.jpg?auto=webp", + self.harvester_class.image(), + ) + + def test_ingredients(self): + self.assertEqual( + [ + "800 g Kartoffeln; vorwiegend festkochend", + "2 Stück, mittelgroß Zwiebel/n", + "2 Stück Paprika; rot", + "2 Stück Paprika; grün", + "1 EL Petersilie; gehackt", + "2 Stück Paprika; gelb", + "250 g Tomaten, frisch", + "4 Stück Wiener Würstchen", + "2 TL Pflanzenöl, Rapsöl/Sonnenblumenöl", + "2 EL Tomatenmark", + "250 ml Gemüsebouillon/Gemüsebrühe, zubereitet; (1 TL Instantpulver)", + "1 TL Oregano; gehackt", + "1 TL Paprikapulver", + "1 Prise(n) Salz/Jodsalz", + "1 Prise(n) Pfeffer", + "2 EL Schmand, 24 % Fett", + ], + self.harvester_class.ingredients(), + ) + + def test_instructions(self): + self.assertEqual( + "Kartoffeln und Zwiebeln schälen. Kartoffeln würfeln und Zwiebeln in Streifen schneiden. Paprika waschen, entkernen und in Stücke schneiden. Tomaten waschen und in Spalten schneiden. Würstchen in Scheiben schneiden.\nÖl in einem Topf erhitzen, Kartoffelwürfel und Zwiebelstreifen darin ca. 5 Minuten anbraten. Paprikastücke zufügen, Tomatenmark einrühren und kurz mitbraten. Mit Brühe ablöschen und zugedeckt ca. 15 Minuten garen.\nKartoffelgulasch mit Oregano, Paprikapulver, Salz und Pfeffer würzen. Würstchenscheiben und Tomatenspalten zufügen und weitere ca. 5 Minuten garen. Kartoffelgulasch mit Salz und Pfeffer abschmecken. Mit einem Klecks Schmand und Petersilie bestreut servieren.", + self.harvester_class.instructions(), + ) + + def test_description(self): + self.assertEqual( + "Das Rezept zaubert ein saftiges, würziges Gericht auf den Tisch und schmeckt garantiert.", + self.harvester_class.description(), + ) + + def test_difficulty(self): + self.assertEqual("Leicht", self.harvester_class.difficulty()) + + def test_nutrients(self): + self.assertEqual( + "10 - 13 PersonalPoints™ pro Portion", + self.harvester_class.nutrients(), + ) From af36c483e743bfe493ca0db3567a5eebe3fc5adb Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 19 Oct 2022 19:36:21 +0200 Subject: [PATCH 10/16] splitted data scraping between classes --- recipe_scrapers/weightwatchers.py | 65 +++++++++++++------------ recipe_scrapers/weightwatcherspublic.py | 37 +++++--------- tests/test_weightwatcherspublic.py | 2 +- 3 files changed, 49 insertions(+), 55 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 8d95b9bc8..106435d48 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -17,57 +17,62 @@ def author(self): def title(self): return self.soup.find("h1").get_text().strip() - def __findDataContainer(self): - return self.soup.find("div", {"class": "styles_container__3N3E8"}) - def category(self): return "WeightWatchers" + # cooking times, yield, difficulty are in a common div in public and non-public recipes + # but class of that block and sub elements are different + # so finding the block and extracting a value will be overridden in class for public recipes, + # but picking the data item based on order is don in this base class (total_time(), cook_time() and so on) + def _findDataContainer(self): + return self.soup.find("div", {"class": "styles_container__3N3E8"}) + + def _extractItemField(self, item): + return item.contents[1] + def total_time(self): return get_minutes( - self.__findDataContainer() - .find("div", string=re.compile(r"minutes Total Time")) - .previous_sibling + self._extractItemField(self._findDataContainer().contents[0]) ) - def cook_time(self): + def prep_time(self): return get_minutes( - self.__findDataContainer() - .find("div", string=re.compile(r"minutes Cook Time")) - .previous_sibling + self._extractItemField(self._findDataContainer().contents[1]) ) - def prep_time(self): + def cook_time(self): return get_minutes( - self.__findDataContainer() - .find("div", string=re.compile(r"minutes Preparation Time")) - .previous_sibling + self._extractItemField(self._findDataContainer().contents[2]) ) def yields(self): - return get_yields( - self.__findDataContainer().find( - "div", string=re.compile(r"Serves \d+ people") - ) - ) + return get_yields(self._extractItemField(self._findDataContainer().contents[3])) def difficulty(self): - return ( - self.__findDataContainer() - .find("div", string=re.compile(r"Difficulty Level:")) - .previous_sibling.get_text() - ) + return self._extractItemField(self._findDataContainer().contents[4]).get_text() + + # Alternative way to extract data based on description instead of position + # def total_time(self): + # return get_minutes( + # self.__findDataContainer() + # .find("div", string=re.compile(r"minutes Total Time")) + # .previous_sibling + # ) def image(self): backgroundImgStyle = self.soup.find("div", {"class": "styles_image__2dnNm"})[ "style" ] - return ( - re.compile(r'url\("(?P\S*)"\);') - .search(backgroundImgStyle) - .groupdict() - .get("imgurl") - ) + + if backgroundImgStyle: + return ( + re.compile(r'url\("(?P\S*)"\);') + .search(backgroundImgStyle) + .groupdict() + .get("imgurl") + ) + + return None def __parseIngridient(self, ingridient): ingridientName = normalize_string( diff --git a/recipe_scrapers/weightwatcherspublic.py b/recipe_scrapers/weightwatcherspublic.py index e9ed3c6ad..9d14c2126 100644 --- a/recipe_scrapers/weightwatcherspublic.py +++ b/recipe_scrapers/weightwatcherspublic.py @@ -1,41 +1,30 @@ # mypy: allow-untyped-defs -from ._utils import get_minutes, get_yields +from ._utils import normalize_string from .weightwatchers import Weightwatchers +# collect the differences between public and non-public weightwatcher recipes in this class class weightwatchersPublic(Weightwatchers): @classmethod def host(cls): return "www.weightwatchers.com" - def __findDataContainer(self): + def _findDataContainer(self): return self.soup.find("div", {"class": "HorizontalList_list__GESs0"}) - def __extractItemField(self, item): + def _extractItemField(self, item): return item.find("div", {"data-e2e-name": "attribute_item_value"}) - def total_time(self): - return get_minutes( - self.__extractItemField(self.__findDataContainer().contents[0]) - ) + def image(self): + return self.soup.find("img", {"class": "FoodMasthead_heroImage__BjVdZ"})["src"] - def cook_time(self): - return get_minutes( - self.__extractItemField(self.__findDataContainer().contents[2]) - ) + def nutrients(self): + return self.soup.find("div", {"class": "Coin_text__3UOb0"})["aria-label"] - def prep_time(self): - return get_minutes( - self.__extractItemField(self.__findDataContainer().contents[1]) + def description(self): + return normalize_string( + self.soup.find("div", {"data-e2e-name": "food_masthead_detail_description"}) + .find("div", {"class": "ReadMoreLess_collapsed__IAzxP"}) + .get_text() ) - - def yields(self): - return get_yields( - self.__extractItemField(self.__findDataContainer().contents[3]) - ) - - def difficulty(self): - return self.__extractItemField( - self.__findDataContainer().contents[4] - ).get_text() diff --git a/tests/test_weightwatcherspublic.py b/tests/test_weightwatcherspublic.py index f5e75ab93..16a2c3b54 100644 --- a/tests/test_weightwatcherspublic.py +++ b/tests/test_weightwatcherspublic.py @@ -81,6 +81,6 @@ def test_difficulty(self): def test_nutrients(self): self.assertEqual( - "10 - 13 PersonalPoints™ pro Portion", + "10 bis 13 PersonalPoints", self.harvester_class.nutrients(), ) From 008cdeaefaf9239f626b1af56ff5b0561325d0c3 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 19 Oct 2022 21:00:51 +0200 Subject: [PATCH 11/16] scrape instructions in public recipes --- recipe_scrapers/weightwatchers.py | 11 ++++++++--- recipe_scrapers/weightwatcherspublic.py | 5 +++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 106435d48..1b2054284 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -105,17 +105,22 @@ def ingredients(self): result.append(self.__parseIngridient(ingridient)) return result - def instructions(self): + def _getInstructions(self, headertag, headerattribute, headervalue, instructiontag): instructions = self.soup.find( - "h3", {"id": "food-detail-recipe-instruction-header"} + headertag, {headerattribute: headervalue} ).parent.find("ol") return "\n".join( [ normalize_string(instruction.get_text()) - for instruction in instructions.find_all("div", {"class": "copy-1"}) + for instruction in instructions.find_all(instructiontag) ] ) + def instructions(self): + return self._getInstructions( + "h3", "id", "food-detail-recipe-instruction-header", "div" + ) + def description(self): return self.soup.find("div", {"class": "copy-1"}).get_text().strip() diff --git a/recipe_scrapers/weightwatcherspublic.py b/recipe_scrapers/weightwatcherspublic.py index 9d14c2126..0f141156f 100644 --- a/recipe_scrapers/weightwatcherspublic.py +++ b/recipe_scrapers/weightwatcherspublic.py @@ -28,3 +28,8 @@ def description(self): .find("div", {"class": "ReadMoreLess_collapsed__IAzxP"}) .get_text() ) + + def instructions(self): + return self._getInstructions( + "h2", "class", "InstructionsFood_headline__vw7cn", "span" + ) From d40dcc6a31b84826077eed20f56821c818c12bc5 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 19 Oct 2022 23:49:07 +0200 Subject: [PATCH 12/16] scrape ingridieints public and nonpublic --- recipe_scrapers/weightwatchers.py | 45 +++++++++++++++---------- recipe_scrapers/weightwatcherspublic.py | 29 ++++++++++++++++ tests/test_weightwatchers.py | 3 ++ tests/test_weightwatcherspublic.py | 5 ++- 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 1b2054284..0e74e5bd0 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -74,34 +74,43 @@ def image(self): return None - def __parseIngridient(self, ingridient): - ingridientName = normalize_string( + def _findIngridientTags(self): + return self.soup.find( + "h3", {"id": "food-detail-recipe-ingredients-header"} + ).parent.find_all("div", {"class": "styles_name__1OYVU"}) + + def _extractIngridientName(self, ingridient): + return normalize_string( ingridient.find("div", {"class": "styles_ingredientName__1Vffd"}) .find("div") .get_text() ) - portionParts = ingridient.find( - "div", {"class": "styles_portion__2NQyq"} - ).find_all("span") - amount = ( - normalize_string(portionParts[0].get_text()) - + " " - + normalize_string(portionParts[1].get_text()) + + def _extractPortionParts(self, ingridient): + tags = ingridient.find("div", {"class": "styles_portion__2NQyq"}).find_all( + "span" ) - if portionParts[2].get_text(): - ingridientName += "; " + normalize_string( - portionParts[2].get_text() - ).replace(", ", "") + return ( + normalize_string(tags[0].get_text()), + normalize_string(tags[1].get_text()), + normalize_string(tags[2].get_text().replace(", ", "")) if tags[2] else None, + ) - return amount + " " + ingridientName + def __parseIngridient(self, ingridient): + ingridientName = self._extractIngridientName(ingridient) + amount, unit, comment = self._extractPortionParts(ingridient) + + if comment: + return amount + " " + unit + " " + ingridientName + "; " + comment + else: + return amount + " " + unit + " " + ingridientName def ingredients(self): result = [] - ingridients = self.soup.find( - "h3", {"id": "food-detail-recipe-ingredients-header"} - ).parent.find_all("div", {"class": "styles_name__1OYVU"}) - for ingridient in ingridients: + ingridientTags = self._findIngridientTags() + + for ingridient in ingridientTags: result.append(self.__parseIngridient(ingridient)) return result diff --git a/recipe_scrapers/weightwatcherspublic.py b/recipe_scrapers/weightwatcherspublic.py index 0f141156f..a539ac678 100644 --- a/recipe_scrapers/weightwatcherspublic.py +++ b/recipe_scrapers/weightwatcherspublic.py @@ -33,3 +33,32 @@ def instructions(self): return self._getInstructions( "h2", "class", "InstructionsFood_headline__vw7cn", "span" ) + + def _findIngridientTags(self): + return ( + self.soup.find("div", {"class": "IngredientsCard_card__VSY4x"}) + .find("div", {"data-e2e-name": "vertical_list_items"}) + .find_all("div", recursive=False) + ) + + def _extractIngridientName(self, ingridient): + return normalize_string( + ingridient.find("p", {"data-e2e-name": "ingredient_name"}).get_text() + ) + + def _extractPortionParts(self, ingridient): + tags = ingridient.find( + "p", {"data-e2e-name": "ingredient_description"} + ).find_all("span") + + comment = None + unit = None + if len(tags) > 2: + comment = normalize_string(tags[2].get_text().replace(", ", "", 1)) + unit = normalize_string(tags[1].get_text()) + else: + descriptionParts = normalize_string(tags[1].get_text()).split(", ", 1) + unit = descriptionParts[0] + comment = descriptionParts[1] if len(descriptionParts) > 1 else None + + return (normalize_string(tags[0].get_text()), unit, comment) diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index a356275d4..79e1acbd7 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -57,6 +57,9 @@ def test_ingredients(self): self.harvester_class.ingredients(), ) + def test_ingredientsCount(self): + self.assertEqual(9, len(self.harvester_class.ingredients())) + def test_instructions(self): self.assertEqual( "Würstchen in Scheiben schneiden. Zwiebel schälen und würfeln. Champignons trocken abreiben und vierteln. Nudeln nach Packungsanweisung in Salzwasser garen.\nÖl in einem Topf erhitzen und Zwiebelwürfel darin andünsten. Würstchenscheiben und Champignonviertel zufügen und ca. 3 Minuten anbraten. Mit Tomaten ablöschen, aufkochen und ca. 5 Minuten köcheln lassen. Würstchengulasch mit Salz, Pfeffer und Paprikapulver würzen. Nudeln abgießen, untermischen und in einer Frischhaltedose transportieren. Würstchengulasch erwärmen und servieren.", diff --git a/tests/test_weightwatcherspublic.py b/tests/test_weightwatcherspublic.py index 16a2c3b54..62de13f86 100644 --- a/tests/test_weightwatcherspublic.py +++ b/tests/test_weightwatcherspublic.py @@ -45,7 +45,7 @@ def test_ingredients(self): self.assertEqual( [ "800 g Kartoffeln; vorwiegend festkochend", - "2 Stück, mittelgroß Zwiebel/n", + "2 Stück Zwiebel/n; mittelgroß", "2 Stück Paprika; rot", "2 Stück Paprika; grün", "1 EL Petersilie; gehackt", @@ -64,6 +64,9 @@ def test_ingredients(self): self.harvester_class.ingredients(), ) + def test_ingredientsCount(self): + self.assertEqual(16, len(self.harvester_class.ingredients())) + def test_instructions(self): self.assertEqual( "Kartoffeln und Zwiebeln schälen. Kartoffeln würfeln und Zwiebeln in Streifen schneiden. Paprika waschen, entkernen und in Stücke schneiden. Tomaten waschen und in Spalten schneiden. Würstchen in Scheiben schneiden.\nÖl in einem Topf erhitzen, Kartoffelwürfel und Zwiebelstreifen darin ca. 5 Minuten anbraten. Paprikastücke zufügen, Tomatenmark einrühren und kurz mitbraten. Mit Brühe ablöschen und zugedeckt ca. 15 Minuten garen.\nKartoffelgulasch mit Oregano, Paprikapulver, Salz und Pfeffer würzen. Würstchenscheiben und Tomatenspalten zufügen und weitere ca. 5 Minuten garen. Kartoffelgulasch mit Salz und Pfeffer abschmecken. Mit einem Klecks Schmand und Petersilie bestreut servieren.", From ec1d2faede4e51a93256674bce345f305e7b8069 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Thu, 20 Oct 2022 00:21:12 +0200 Subject: [PATCH 13/16] added additional test file --- recipe_scrapers/weightwatchers.py | 7 +- tests/test_data/weightwatchers_2.testhtml | 191 ++++++++++++++++++++++ tests/test_weightwatchers.py | 2 +- tests/test_weightwatchers_2.py | 93 +++++++++++ 4 files changed, 288 insertions(+), 5 deletions(-) create mode 100644 tests/test_data/weightwatchers_2.testhtml create mode 100644 tests/test_weightwatchers_2.py diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 0e74e5bd0..952b5c5a2 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -135,10 +135,9 @@ def description(self): def nutrients(self): result = ( - self.soup.find( - "img", {"class": "styles_positivePointsIcon__2XYyV"} - ).next_sibling.get_text() - + " personal points" + self.soup.find("div", {"class": "styles_points__2gv9n"}) + .find("div", {"class": "styles_container__2p-YG"}) + .get_text() ) veggiepoints = self.soup.find( "div", {"class": "styles_vegetableServings__2YSPy"} diff --git a/tests/test_data/weightwatchers_2.testhtml b/tests/test_data/weightwatchers_2.testhtml new file mode 100644 index 000000000..c95ba0fcb --- /dev/null +++ b/tests/test_data/weightwatchers_2.testhtml @@ -0,0 +1,191 @@ + + +Details | Tagebuch

Hackbraten Stroganoff

10 personal points
1:05 minutes Total Time
0:40 minutes Preparation Time
0:25 minutes Cook Time
Serves 4 people
Difficulty Level: Mittel
Wenn das mal keine mutige Abwandlung des beliebten Klassiker ist. Schmeckt übrigens ganz vorzüglich!
Portion(en)
Tageszeit
Datum

Zutaten

Champignons, frisch
125g
earn 0 personal points
Cornichons
5Stück
1 personal points
Frühlingszwiebeln/Lauchzwiebeln
1Bund
earn 0 personal points
Tatar, roh
600g
13 personal points
Paniermehl/Semmelbrösel
2EL
2 personal points
Eier, Hühnereier
1Stück, Gewichtsklasse M
0 personal points
Senf, klassisch
4TL
0 personal points
Salz/Jodsalz
1Prise(n)
0 personal points
Pfeffer
1Prise(n)
0 personal points
Kartoffeln
600g, festkochend
10 personal points
Halbfettmargarine, 39 % Fett
2TL
2 personal points
Weizenmehl
1EL
1 personal points
Gemüsebouillon/Gemüsebrühe, zubereitet
300ml(2 TL Instantpulver)
0 personal points
Crème légère
3EL
4 personal points
Petersilie
2EL, gehackt
0 personal points
Rote Bete (Konserve)
430g, entspricht 1 Glas
7 personal points

Anleitung

  1. Backofen auf 200° C (Gas: Stufe 3, Umluft: 180° C) vorheizen. Champignons trocken abreiben und mit Cornichons in feine Würfel schneiden. Frühlingszwiebeln waschen und in Ringe schneiden. Champignon-, Cornichonwürfel und Frühlingszwiebelringe mit Tatar, Paniermehl, Ei, 1 Teelöffel Senf, Salz und Pfeffer verkneten und zu einem Braten formen.
  2. Hackbraten in eine Kastenform (Länge 30 cm) geben und im Backofen auf unterer Schiene ca. 45 Minuten garen. Kartoffeln schälen, halbieren und in Salzwasser ca. 20 Minuten garen. Für die Sauce Margarine in einem Topf schmelzen, Mehl darin hellgelb anschwitzen, unter Rühren mit Brühe ablöschen, ca. 4 Minuten köcheln lassen. Mit Créme légère verfeinern und mit Salz, Pfeffer, restlichem Senf und Petersilie würzen. Kartoffeln abgießen und Rote Bete abtropfen lassen. Hackbraten mit Senfsauce, Salzkartoffeln und Rote Bete servieren.
+ + + +
+ + + + + + +
\ No newline at end of file diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index 79e1acbd7..dcf716997 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -77,6 +77,6 @@ def test_difficulty(self): def test_nutrients(self): self.assertEqual( - "12 personal points\n+2 Punkte von 2 Portion(en) Gemüse", + "earn 12 personal points\n+2 Punkte von 2 Portion(en) Gemüse", self.harvester_class.nutrients(), ) diff --git a/tests/test_weightwatchers_2.py b/tests/test_weightwatchers_2.py new file mode 100644 index 000000000..c411f79af --- /dev/null +++ b/tests/test_weightwatchers_2.py @@ -0,0 +1,93 @@ +# mypy: allow-untyped-defs + +from recipe_scrapers.weightwatchers import Weightwatchers +from tests import ScraperTest + + +class TestWeightwatchersScraper(ScraperTest): + + # Test-Url: + # https://cmx.weightwatchers.de/details/WWRECIPE:562a9bc8a43e6bde2cf369df + + scraper_class = Weightwatchers + + @property + def test_file_name(self): + return "{}_2".format(self.scraper_class.__name__.lower()) + + def test_host(self): + self.assertEqual("www.weightwatchers.com", self.harvester_class.host()) + + def test_author(self): + self.assertEqual("WeightWatchers", self.harvester_class.author()) + + def test_title(self): + self.assertEqual(self.harvester_class.title(), "Hackbraten Stroganoff") + + def test_category(self): + self.assertEqual("WeightWatchers", self.harvester_class.category()) + + def test_total_time(self): + self.assertEqual(65, self.harvester_class.total_time()) + + def test_cook_time(self): + self.assertEqual(25, self.harvester_class.cook_time()) + + def test_prep_time(self): + self.assertEqual(40, self.harvester_class.prep_time()) + + def test_yields(self): + self.assertEqual("4 servings", self.harvester_class.yields()) + + def test_image(self): + self.assertEqual( + "https://cmx.weightwatchers.com/assets-proxy/weight-watchers/image/upload/t_WINE_EXTRALARGE/vziguy9ale25vtx7spgl.jpg", + self.harvester_class.image(), + ) + + def test_ingredients(self): + self.assertEqual( + [ + "125 g Champignons, frisch", + "5 Stück Cornichons", + "1 Bund Frühlingszwiebeln/Lauchzwiebeln", + "600 g Tatar, roh", + "2 EL Paniermehl/Semmelbrösel", + "1 Stück, Gewichtsklasse M Eier, Hühnereier", + "4 TL Senf, klassisch", + "1 Prise(n) Salz/Jodsalz", + "1 Prise(n) Pfeffer", + "600 g Kartoffeln; festkochend", + "2 TL Halbfettmargarine, 39 % Fett", + "1 EL Weizenmehl", + "300 ml Gemüsebouillon/Gemüsebrühe, zubereitet; (2 TL Instantpulver)", + "3 EL Crème légère", + "2 EL, gehackt Petersilie", + "430 g Rote Bete (Konserve); entspricht 1 Glas", + ], + self.harvester_class.ingredients(), + ) + + def test_ingredientsCount(self): + self.assertEqual(16, len(self.harvester_class.ingredients())) + + def test_instructions(self): + self.assertEqual( + "Backofen auf 200° C (Gas: Stufe 3, Umluft: 180° C) vorheizen. Champignons trocken abreiben und mit Cornichons in feine Würfel schneiden. Frühlingszwiebeln waschen und in Ringe schneiden. Champignon-, Cornichonwürfel und Frühlingszwiebelringe mit Tatar, Paniermehl, Ei, 1 Teelöffel Senf, Salz und Pfeffer verkneten und zu einem Braten formen.\nHackbraten in eine Kastenform (Länge 30 cm) geben und im Backofen auf unterer Schiene ca. 45 Minuten garen. Kartoffeln schälen, halbieren und in Salzwasser ca. 20 Minuten garen. Für die Sauce Margarine in einem Topf schmelzen, Mehl darin hellgelb anschwitzen, unter Rühren mit Brühe ablöschen, ca. 4 Minuten köcheln lassen. Mit Créme légère verfeinern und mit Salz, Pfeffer, restlichem Senf und Petersilie würzen. Kartoffeln abgießen und Rote Bete abtropfen lassen. Hackbraten mit Senfsauce, Salzkartoffeln und Rote Bete servieren.", + self.harvester_class.instructions(), + ) + + def test_description(self): + self.assertEqual( + "Wenn das mal keine mutige Abwandlung des beliebten Klassiker ist. Schmeckt übrigens ganz vorzüglich!", + self.harvester_class.description(), + ) + + def test_difficulty(self): + self.assertEqual("Mittel", self.harvester_class.difficulty()) + + def test_nutrients(self): + self.assertEqual( + "10 personal points", + self.harvester_class.nutrients(), + ) From 674a4ba56f4b443fa158f3b3edc501075f753610 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sun, 23 Oct 2022 18:34:27 +0200 Subject: [PATCH 14/16] fix review comments --- recipe_scrapers/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index aed7fdf35..4e69355ba 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -210,8 +210,8 @@ from .vegolosi import Vegolosi from .vegrecipesofindia import VegRecipesOfIndia from .watchwhatueat import WatchWhatUEat -from .weightwatchers import Weightwatchers -from .weightwatcherspublic import weightwatchersPublic +from .weightwatchers import WeightWatchers +from .weightwatcherspublic import WeightWatchersPublic from .whatsgabycooking import WhatsGabyCooking from .wholefoods import WholeFoods from .wikicookbook import WikiCookbook @@ -441,7 +441,8 @@ VegRecipesOfIndia.host(): VegRecipesOfIndia, Vegolosi.host(): Vegolosi, WatchWhatUEat.host(): WatchWhatUEat, - Weightwatchers.host(): Weightwatchers, + WeightWatchers.host(): WeightWatchers, + WeightWatchersPublic.host(): WeightWatchersPublic, WhatsGabyCooking.host(): WhatsGabyCooking, WholeFoods.host(): WholeFoods, WholeFoods.host(domain="co.uk"): WholeFoods, @@ -452,7 +453,6 @@ Yummly.host(): Yummly, ZeitWochenmarkt.host(): ZeitWochenmarkt, ZenBelly.host(): ZenBelly, - weightwatchersPublic.host(): weightwatchersPublic, } From 21f7c294281151366edd1226d25285c45c84d7a6 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Sun, 23 Oct 2022 18:36:30 +0200 Subject: [PATCH 15/16] fix review comments --- recipe_scrapers/weightwatchers.py | 26 ++++++++++++------------- recipe_scrapers/weightwatcherspublic.py | 8 +++++--- tests/test_weightwatchers.py | 13 +++++++------ tests/test_weightwatchers_2.py | 10 ++++------ tests/test_weightwatcherspublic.py | 12 ++++++------ 5 files changed, 35 insertions(+), 34 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index 952b5c5a2..a3a2c180a 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -6,7 +6,7 @@ from ._utils import get_minutes, get_yields, normalize_string -class Weightwatchers(AbstractScraper): +class WeightWatchers(AbstractScraper): @classmethod def host(cls): return "www.weightwatchers.com" @@ -66,8 +66,7 @@ def image(self): if backgroundImgStyle: return ( - re.compile(r'url\("(?P\S*)"\);') - .search(backgroundImgStyle) + re.search(r'url\("(?P\S*)"\);', backgroundImgStyle) .groupdict() .get("imgurl") ) @@ -102,17 +101,15 @@ def __parseIngridient(self, ingridient): amount, unit, comment = self._extractPortionParts(ingridient) if comment: - return amount + " " + unit + " " + ingridientName + "; " + comment + return f"{amount} {unit} {ingridientName}; {comment}" else: - return amount + " " + unit + " " + ingridientName + return f"{amount} {unit} {ingridientName}" def ingredients(self): - result = [] - ingridientTags = self._findIngridientTags() - - for ingridient in ingridientTags: - result.append(self.__parseIngridient(ingridient)) - return result + return [ + self.__parseIngridient(ingridient) + for ingridient in self._findIngridientTags() + ] def _getInstructions(self, headertag, headerattribute, headervalue, instructiontag): instructions = self.soup.find( @@ -134,16 +131,19 @@ def description(self): return self.soup.find("div", {"class": "copy-1"}).get_text().strip() def nutrients(self): - result = ( + result = {} + + result["personal points"] = ( self.soup.find("div", {"class": "styles_points__2gv9n"}) .find("div", {"class": "styles_container__2p-YG"}) .get_text() ) + veggiepoints = self.soup.find( "div", {"class": "styles_vegetableServings__2YSPy"} ) if veggiepoints: - result += "\n" + normalize_string( + result["positive points"] = normalize_string( veggiepoints.find( "div", {"class": "styles_container__2p-YG"} ).next_sibling.get_text() diff --git a/recipe_scrapers/weightwatcherspublic.py b/recipe_scrapers/weightwatcherspublic.py index a539ac678..4f40e7e06 100644 --- a/recipe_scrapers/weightwatcherspublic.py +++ b/recipe_scrapers/weightwatcherspublic.py @@ -1,11 +1,11 @@ # mypy: allow-untyped-defs from ._utils import normalize_string -from .weightwatchers import Weightwatchers +from .weightwatchers import WeightWatchers # collect the differences between public and non-public weightwatcher recipes in this class -class weightwatchersPublic(Weightwatchers): +class WeightWatchersPublic(WeightWatchers): @classmethod def host(cls): return "www.weightwatchers.com" @@ -20,7 +20,9 @@ def image(self): return self.soup.find("img", {"class": "FoodMasthead_heroImage__BjVdZ"})["src"] def nutrients(self): - return self.soup.find("div", {"class": "Coin_text__3UOb0"})["aria-label"] + return { + "points": self.soup.find("div", {"class": "Coin_text__3UOb0"})["aria-label"] + } def description(self): return normalize_string( diff --git a/tests/test_weightwatchers.py b/tests/test_weightwatchers.py index dcf716997..5c741cc9f 100644 --- a/tests/test_weightwatchers.py +++ b/tests/test_weightwatchers.py @@ -1,6 +1,6 @@ # mypy: allow-untyped-defs -from recipe_scrapers.weightwatchers import Weightwatchers +from recipe_scrapers.weightwatchers import WeightWatchers from tests import ScraperTest @@ -9,7 +9,7 @@ class TestWeightwatchersScraper(ScraperTest): # Test-Url: # https://cmx.weightwatchers.de/details/WWRECIPE:5667ab72a29713e4335bb342 - scraper_class = Weightwatchers + scraper_class = WeightWatchers def test_host(self): self.assertEqual("www.weightwatchers.com", self.harvester_class.host()) @@ -76,7 +76,8 @@ def test_difficulty(self): self.assertEqual("Leicht", self.harvester_class.difficulty()) def test_nutrients(self): - self.assertEqual( - "earn 12 personal points\n+2 Punkte von 2 Portion(en) Gemüse", - self.harvester_class.nutrients(), - ) + expected_nutrients = { + "personal points": "earn 12 personal points", + "positive points": "+2 Punkte von 2 Portion(en) Gemüse", + } + self.assertEqual(self.harvester_class.nutrients(), expected_nutrients) diff --git a/tests/test_weightwatchers_2.py b/tests/test_weightwatchers_2.py index c411f79af..71109096f 100644 --- a/tests/test_weightwatchers_2.py +++ b/tests/test_weightwatchers_2.py @@ -1,6 +1,6 @@ # mypy: allow-untyped-defs -from recipe_scrapers.weightwatchers import Weightwatchers +from recipe_scrapers.weightwatchers import WeightWatchers from tests import ScraperTest @@ -9,7 +9,7 @@ class TestWeightwatchersScraper(ScraperTest): # Test-Url: # https://cmx.weightwatchers.de/details/WWRECIPE:562a9bc8a43e6bde2cf369df - scraper_class = Weightwatchers + scraper_class = WeightWatchers @property def test_file_name(self): @@ -87,7 +87,5 @@ def test_difficulty(self): self.assertEqual("Mittel", self.harvester_class.difficulty()) def test_nutrients(self): - self.assertEqual( - "10 personal points", - self.harvester_class.nutrients(), - ) + expected_nutrients = {"personal points": "10 personal points"} + self.assertEqual(self.harvester_class.nutrients(), expected_nutrients) diff --git a/tests/test_weightwatcherspublic.py b/tests/test_weightwatcherspublic.py index 62de13f86..5d1120a23 100644 --- a/tests/test_weightwatcherspublic.py +++ b/tests/test_weightwatcherspublic.py @@ -1,12 +1,12 @@ # mypy: allow-untyped-defs -from recipe_scrapers.weightwatcherspublic import weightwatchersPublic +from recipe_scrapers.weightwatcherspublic import WeightWatchersPublic from tests import ScraperTest class TestweightwatchersPublicScraper(ScraperTest): - scraper_class = weightwatchersPublic + scraper_class = WeightWatchersPublic # Test-Url: # https://www.weightwatchers.com/de/rezept/kartoffelgulasch/562a9b02873e1afb2a3c4c13 @@ -83,7 +83,7 @@ def test_difficulty(self): self.assertEqual("Leicht", self.harvester_class.difficulty()) def test_nutrients(self): - self.assertEqual( - "10 bis 13 PersonalPoints", - self.harvester_class.nutrients(), - ) + expected_nutrients = { + "points": "10 bis 13 PersonalPoints", + } + self.assertEqual(self.harvester_class.nutrients(), expected_nutrients) From 02b37cebb1a06ca06987c17ac66a7f064f4be265 Mon Sep 17 00:00:00 2001 From: bickerdyke <48774580+bickerdyke@users.noreply.github.com> Date: Wed, 26 Oct 2022 00:24:33 +0200 Subject: [PATCH 16/16] Fix review comment and unit test error --- recipe_scrapers/weightwatchers.py | 20 ++++++++++++++------ tests/test_weightwatchers_2.py | 5 +---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/recipe_scrapers/weightwatchers.py b/recipe_scrapers/weightwatchers.py index a3a2c180a..5b0c64514 100644 --- a/recipe_scrapers/weightwatchers.py +++ b/recipe_scrapers/weightwatchers.py @@ -89,12 +89,20 @@ def _extractPortionParts(self, ingridient): tags = ingridient.find("div", {"class": "styles_portion__2NQyq"}).find_all( "span" ) - - return ( - normalize_string(tags[0].get_text()), - normalize_string(tags[1].get_text()), - normalize_string(tags[2].get_text().replace(", ", "")) if tags[2] else None, - ) + try: + return ( + normalize_string(tags[0].get_text()), + normalize_string(tags[1].get_text()), + normalize_string(tags[2].get_text().replace(", ", "")) + if tags[2] + else None, + ) + except IndexError: + return ( + normalize_string(tags[0].get_text()), + normalize_string(tags[1].get_text()), + None, + ) def __parseIngridient(self, ingridient): ingridientName = self._extractIngridientName(ingridient) diff --git a/tests/test_weightwatchers_2.py b/tests/test_weightwatchers_2.py index 71109096f..424cec068 100644 --- a/tests/test_weightwatchers_2.py +++ b/tests/test_weightwatchers_2.py @@ -10,10 +10,7 @@ class TestWeightwatchersScraper(ScraperTest): # https://cmx.weightwatchers.de/details/WWRECIPE:562a9bc8a43e6bde2cf369df scraper_class = WeightWatchers - - @property - def test_file_name(self): - return "{}_2".format(self.scraper_class.__name__.lower()) + test_file_name = "weightwatchers_2" def test_host(self): self.assertEqual("www.weightwatchers.com", self.harvester_class.host())