Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue 646/scraper weightwatchers (#657)
- Loading branch information
1 parent
2e7d0e1
commit 954c5c6
Showing
11 changed files
with
904 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# mypy: allow-untyped-defs | ||
|
||
import re | ||
|
||
from ._abstract import AbstractScraper | ||
from ._utils import get_minutes, get_yields, normalize_string | ||
|
||
|
||
class WeightWatchers(AbstractScraper): | ||
@classmethod | ||
def host(cls): | ||
return "www.weightwatchers.com" | ||
|
||
def author(self): | ||
return "WeightWatchers" | ||
|
||
def title(self): | ||
return self.soup.find("h1").get_text().strip() | ||
|
||
def category(self): | ||
return "WeightWatchers" | ||
|
||
# cooking times, yield, difficulty are in a common div in public and non-public recipes | ||
# but class of that block and sub elements are different | ||
# so finding the block and extracting a value will be overridden in class for public recipes, | ||
# but picking the data item based on order is don in this base class (total_time(), cook_time() and so on) | ||
def _findDataContainer(self): | ||
return self.soup.find("div", {"class": "styles_container__3N3E8"}) | ||
|
||
def _extractItemField(self, item): | ||
return item.contents[1] | ||
|
||
def total_time(self): | ||
return get_minutes( | ||
self._extractItemField(self._findDataContainer().contents[0]) | ||
) | ||
|
||
def prep_time(self): | ||
return get_minutes( | ||
self._extractItemField(self._findDataContainer().contents[1]) | ||
) | ||
|
||
def cook_time(self): | ||
return get_minutes( | ||
self._extractItemField(self._findDataContainer().contents[2]) | ||
) | ||
|
||
def yields(self): | ||
return get_yields(self._extractItemField(self._findDataContainer().contents[3])) | ||
|
||
def difficulty(self): | ||
return self._extractItemField(self._findDataContainer().contents[4]).get_text() | ||
|
||
# Alternative way to extract data based on description instead of position | ||
# def total_time(self): | ||
# return get_minutes( | ||
# self.__findDataContainer() | ||
# .find("div", string=re.compile(r"minutes Total Time")) | ||
# .previous_sibling | ||
# ) | ||
|
||
def image(self): | ||
backgroundImgStyle = self.soup.find("div", {"class": "styles_image__2dnNm"})[ | ||
"style" | ||
] | ||
|
||
if backgroundImgStyle: | ||
return ( | ||
re.search(r'url\("(?P<imgurl>\S*)"\);', backgroundImgStyle) | ||
.groupdict() | ||
.get("imgurl") | ||
) | ||
|
||
return None | ||
|
||
def _findIngridientTags(self): | ||
return self.soup.find( | ||
"h3", {"id": "food-detail-recipe-ingredients-header"} | ||
).parent.find_all("div", {"class": "styles_name__1OYVU"}) | ||
|
||
def _extractIngridientName(self, ingridient): | ||
return normalize_string( | ||
ingridient.find("div", {"class": "styles_ingredientName__1Vffd"}) | ||
.find("div") | ||
.get_text() | ||
) | ||
|
||
def _extractPortionParts(self, ingridient): | ||
tags = ingridient.find("div", {"class": "styles_portion__2NQyq"}).find_all( | ||
"span" | ||
) | ||
try: | ||
return ( | ||
normalize_string(tags[0].get_text()), | ||
normalize_string(tags[1].get_text()), | ||
normalize_string(tags[2].get_text().replace(", ", "")) | ||
if tags[2] | ||
else None, | ||
) | ||
except IndexError: | ||
return ( | ||
normalize_string(tags[0].get_text()), | ||
normalize_string(tags[1].get_text()), | ||
None, | ||
) | ||
|
||
def __parseIngridient(self, ingridient): | ||
ingridientName = self._extractIngridientName(ingridient) | ||
amount, unit, comment = self._extractPortionParts(ingridient) | ||
|
||
if comment: | ||
return f"{amount} {unit} {ingridientName}; {comment}" | ||
else: | ||
return f"{amount} {unit} {ingridientName}" | ||
|
||
def ingredients(self): | ||
return [ | ||
self.__parseIngridient(ingridient) | ||
for ingridient in self._findIngridientTags() | ||
] | ||
|
||
def _getInstructions(self, headertag, headerattribute, headervalue, instructiontag): | ||
instructions = self.soup.find( | ||
headertag, {headerattribute: headervalue} | ||
).parent.find("ol") | ||
return "\n".join( | ||
[ | ||
normalize_string(instruction.get_text()) | ||
for instruction in instructions.find_all(instructiontag) | ||
] | ||
) | ||
|
||
def instructions(self): | ||
return self._getInstructions( | ||
"h3", "id", "food-detail-recipe-instruction-header", "div" | ||
) | ||
|
||
def description(self): | ||
return self.soup.find("div", {"class": "copy-1"}).get_text().strip() | ||
|
||
def nutrients(self): | ||
result = {} | ||
|
||
result["personal points"] = ( | ||
self.soup.find("div", {"class": "styles_points__2gv9n"}) | ||
.find("div", {"class": "styles_container__2p-YG"}) | ||
.get_text() | ||
) | ||
|
||
veggiepoints = self.soup.find( | ||
"div", {"class": "styles_vegetableServings__2YSPy"} | ||
) | ||
if veggiepoints: | ||
result["positive points"] = normalize_string( | ||
veggiepoints.find( | ||
"div", {"class": "styles_container__2p-YG"} | ||
).next_sibling.get_text() | ||
) | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# mypy: allow-untyped-defs | ||
|
||
from ._utils import normalize_string | ||
from .weightwatchers import WeightWatchers | ||
|
||
|
||
# collect the differences between public and non-public weightwatcher recipes in this class | ||
class WeightWatchersPublic(WeightWatchers): | ||
@classmethod | ||
def host(cls): | ||
return "www.weightwatchers.com" | ||
|
||
def _findDataContainer(self): | ||
return self.soup.find("div", {"class": "HorizontalList_list__GESs0"}) | ||
|
||
def _extractItemField(self, item): | ||
return item.find("div", {"data-e2e-name": "attribute_item_value"}) | ||
|
||
def image(self): | ||
return self.soup.find("img", {"class": "FoodMasthead_heroImage__BjVdZ"})["src"] | ||
|
||
def nutrients(self): | ||
return { | ||
"points": self.soup.find("div", {"class": "Coin_text__3UOb0"})["aria-label"] | ||
} | ||
|
||
def description(self): | ||
return normalize_string( | ||
self.soup.find("div", {"data-e2e-name": "food_masthead_detail_description"}) | ||
.find("div", {"class": "ReadMoreLess_collapsed__IAzxP"}) | ||
.get_text() | ||
) | ||
|
||
def instructions(self): | ||
return self._getInstructions( | ||
"h2", "class", "InstructionsFood_headline__vw7cn", "span" | ||
) | ||
|
||
def _findIngridientTags(self): | ||
return ( | ||
self.soup.find("div", {"class": "IngredientsCard_card__VSY4x"}) | ||
.find("div", {"data-e2e-name": "vertical_list_items"}) | ||
.find_all("div", recursive=False) | ||
) | ||
|
||
def _extractIngridientName(self, ingridient): | ||
return normalize_string( | ||
ingridient.find("p", {"data-e2e-name": "ingredient_name"}).get_text() | ||
) | ||
|
||
def _extractPortionParts(self, ingridient): | ||
tags = ingridient.find( | ||
"p", {"data-e2e-name": "ingredient_description"} | ||
).find_all("span") | ||
|
||
comment = None | ||
unit = None | ||
if len(tags) > 2: | ||
comment = normalize_string(tags[2].get_text().replace(", ", "", 1)) | ||
unit = normalize_string(tags[1].get_text()) | ||
else: | ||
descriptionParts = normalize_string(tags[1].get_text()).split(", ", 1) | ||
unit = descriptionParts[0] | ||
comment = descriptionParts[1] if len(descriptionParts) > 1 else None | ||
|
||
return (normalize_string(tags[0].get_text()), unit, comment) |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# mypy: allow-untyped-defs | ||
|
||
from recipe_scrapers.weightwatchers import WeightWatchers | ||
from tests import ScraperTest | ||
|
||
|
||
class TestWeightwatchersScraper(ScraperTest): | ||
|
||
# Test-Url: | ||
# https://cmx.weightwatchers.de/details/WWRECIPE:5667ab72a29713e4335bb342 | ||
|
||
scraper_class = WeightWatchers | ||
|
||
def test_host(self): | ||
self.assertEqual("www.weightwatchers.com", self.harvester_class.host()) | ||
|
||
def test_author(self): | ||
self.assertEqual("WeightWatchers", self.harvester_class.author()) | ||
|
||
def test_title(self): | ||
self.assertEqual(self.harvester_class.title(), "Würstchengulasch mit Nudeln") | ||
|
||
def test_category(self): | ||
self.assertEqual("WeightWatchers", self.harvester_class.category()) | ||
|
||
def test_total_time(self): | ||
self.assertEqual(25, self.harvester_class.total_time()) | ||
|
||
def test_cook_time(self): | ||
self.assertEqual(0, self.harvester_class.cook_time()) | ||
|
||
def test_prep_time(self): | ||
self.assertEqual(25, self.harvester_class.prep_time()) | ||
|
||
def test_yields(self): | ||
self.assertEqual("2 servings", self.harvester_class.yields()) | ||
|
||
def test_image(self): | ||
self.assertEqual( | ||
"https://cmx.weightwatchers.com/assets-proxy/weight-watchers/image/upload/t_WINE_EXTRALARGE/i34cskr1hxegmxqukawd.jpg", | ||
self.harvester_class.image(), | ||
) | ||
|
||
def test_ingredients(self): | ||
self.assertEqual( | ||
[ | ||
"2 Stück Geflügelwürstchen", | ||
"1 Stück, klein Zwiebel/n", | ||
"200 g Champignons, frisch; braun", | ||
"120 g Nudeln, trocken, jede Sorte; Spiralnudeln", | ||
"1 Prise(n) Salz/Jodsalz", | ||
"2 TL Pflanzenöl, Rapsöl/Sonnenblumenöl", | ||
"400 g Tomaten, passiert", | ||
"1 Prise(n) Pfeffer", | ||
"1⁄2 TL Paprikapulver", | ||
], | ||
self.harvester_class.ingredients(), | ||
) | ||
|
||
def test_ingredientsCount(self): | ||
self.assertEqual(9, len(self.harvester_class.ingredients())) | ||
|
||
def test_instructions(self): | ||
self.assertEqual( | ||
"Würstchen in Scheiben schneiden. Zwiebel schälen und würfeln. Champignons trocken abreiben und vierteln. Nudeln nach Packungsanweisung in Salzwasser garen.\nÖl in einem Topf erhitzen und Zwiebelwürfel darin andünsten. Würstchenscheiben und Champignonviertel zufügen und ca. 3 Minuten anbraten. Mit Tomaten ablöschen, aufkochen und ca. 5 Minuten köcheln lassen. Würstchengulasch mit Salz, Pfeffer und Paprikapulver würzen. Nudeln abgießen, untermischen und in einer Frischhaltedose transportieren. Würstchengulasch erwärmen und servieren.", | ||
self.harvester_class.instructions(), | ||
) | ||
|
||
def test_description(self): | ||
self.assertEqual( | ||
"18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.", | ||
self.harvester_class.description(), | ||
) | ||
|
||
def test_difficulty(self): | ||
self.assertEqual("Leicht", self.harvester_class.difficulty()) | ||
|
||
def test_nutrients(self): | ||
expected_nutrients = { | ||
"personal points": "earn 12 personal points", | ||
"positive points": "+2 Punkte von 2 Portion(en) Gemüse", | ||
} | ||
self.assertEqual(self.harvester_class.nutrients(), expected_nutrients) |
Oops, something went wrong.