Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 646/scraper weightwatchers #657

Merged
merged 21 commits into from Oct 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.rst
Expand Up @@ -293,6 +293,7 @@ Scrapers available for:
- `https://vegolosi.it/ <https://vegolosi.it>`_
- `https://vegrecipesofindia.com/ <https://www.vegrecipesofindia.com/>`_
- `https://watchwhatueat.com/ <https://watchwhatueat.com/>`_
- `https://www.weightwatchers.com/ <https://www.weightwatchers.com/>`_(*)
- `https://whatsgabycooking.com/ <https://whatsgabycooking.com>`_
- `https://www.wholefoodsmarket.com/ <https://www.wholefoodsmarket.com/>`_
- `https://www.wholefoodsmarket.co.uk/ <https://www.wholefoodsmarket.co.uk/>`_
Expand All @@ -304,6 +305,7 @@ Scrapers available for:
- `https://zeit.de/ (wochenmarkt) <https://www.zeit.de/zeit-magazin/wochenmarkt/index>`_
- `https://zenbelly.com/ <https://zenbelly.com>`_

(*) offline saved files only. Page requires login

Contribute
----------
Expand Down
4 changes: 4 additions & 0 deletions recipe_scrapers/__init__.py
Expand Up @@ -210,6 +210,8 @@
from .vegolosi import Vegolosi
from .vegrecipesofindia import VegRecipesOfIndia
from .watchwhatueat import WatchWhatUEat
from .weightwatchers import WeightWatchers
from .weightwatcherspublic import WeightWatchersPublic
from .whatsgabycooking import WhatsGabyCooking
from .wholefoods import WholeFoods
from .wikicookbook import WikiCookbook
Expand Down Expand Up @@ -439,6 +441,8 @@
VegRecipesOfIndia.host(): VegRecipesOfIndia,
Vegolosi.host(): Vegolosi,
WatchWhatUEat.host(): WatchWhatUEat,
WeightWatchers.host(): WeightWatchers,
WeightWatchersPublic.host(): WeightWatchersPublic,
WhatsGabyCooking.host(): WhatsGabyCooking,
WholeFoods.host(): WholeFoods,
WholeFoods.host(domain="co.uk"): WholeFoods,
Expand Down
2 changes: 1 addition & 1 deletion recipe_scrapers/_utils.py
Expand Up @@ -20,7 +20,7 @@
}

TIME_REGEX = re.compile(
r"(\D*(?P<hours>[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra))?(\D*(?P<minutes>\d+)\s*(minutes|mins|min|m|perc))?",
r"(\D*(?P<hours>[\d.\s/?¼½¾⅓⅔⅕⅖⅗]+)\s*(hours|hrs|hr|h|óra|:))?(\D*(?P<minutes>\d+)\s*(minutes|mins|min|m|perc|$))?",
re.IGNORECASE,
)

Expand Down
160 changes: 160 additions & 0 deletions recipe_scrapers/weightwatchers.py
@@ -0,0 +1,160 @@
# mypy: allow-untyped-defs

import re

from ._abstract import AbstractScraper
from ._utils import get_minutes, get_yields, normalize_string


class WeightWatchers(AbstractScraper):
@classmethod
def host(cls):
return "www.weightwatchers.com"

def author(self):
return "WeightWatchers"

def title(self):
return self.soup.find("h1").get_text().strip()

def category(self):
return "WeightWatchers"

# cooking times, yield, difficulty are in a common div in public and non-public recipes
# but class of that block and sub elements are different
# so finding the block and extracting a value will be overridden in class for public recipes,
# but picking the data item based on order is don in this base class (total_time(), cook_time() and so on)
def _findDataContainer(self):
return self.soup.find("div", {"class": "styles_container__3N3E8"})

def _extractItemField(self, item):
return item.contents[1]

def total_time(self):
return get_minutes(
self._extractItemField(self._findDataContainer().contents[0])
)

def prep_time(self):
return get_minutes(
self._extractItemField(self._findDataContainer().contents[1])
)

def cook_time(self):
return get_minutes(
self._extractItemField(self._findDataContainer().contents[2])
)

def yields(self):
return get_yields(self._extractItemField(self._findDataContainer().contents[3]))

def difficulty(self):
return self._extractItemField(self._findDataContainer().contents[4]).get_text()

# Alternative way to extract data based on description instead of position
# def total_time(self):
# return get_minutes(
# self.__findDataContainer()
# .find("div", string=re.compile(r"minutes Total Time"))
# .previous_sibling
# )

def image(self):
backgroundImgStyle = self.soup.find("div", {"class": "styles_image__2dnNm"})[
"style"
]

if backgroundImgStyle:
return (
re.search(r'url\("(?P<imgurl>\S*)"\);', backgroundImgStyle)
.groupdict()
.get("imgurl")
)

return None

def _findIngridientTags(self):
return self.soup.find(
"h3", {"id": "food-detail-recipe-ingredients-header"}
).parent.find_all("div", {"class": "styles_name__1OYVU"})

def _extractIngridientName(self, ingridient):
return normalize_string(
ingridient.find("div", {"class": "styles_ingredientName__1Vffd"})
.find("div")
.get_text()
)

def _extractPortionParts(self, ingridient):
tags = ingridient.find("div", {"class": "styles_portion__2NQyq"}).find_all(
"span"
)
try:
return (
normalize_string(tags[0].get_text()),
normalize_string(tags[1].get_text()),
normalize_string(tags[2].get_text().replace(", ", ""))
if tags[2]
else None,
)
except IndexError:
return (
normalize_string(tags[0].get_text()),
normalize_string(tags[1].get_text()),
None,
)

def __parseIngridient(self, ingridient):
ingridientName = self._extractIngridientName(ingridient)
amount, unit, comment = self._extractPortionParts(ingridient)

if comment:
return f"{amount} {unit} {ingridientName}; {comment}"
else:
return f"{amount} {unit} {ingridientName}"

def ingredients(self):
return [
self.__parseIngridient(ingridient)
for ingridient in self._findIngridientTags()
]

def _getInstructions(self, headertag, headerattribute, headervalue, instructiontag):
instructions = self.soup.find(
headertag, {headerattribute: headervalue}
).parent.find("ol")
return "\n".join(
[
normalize_string(instruction.get_text())
for instruction in instructions.find_all(instructiontag)
]
)

def instructions(self):
return self._getInstructions(
"h3", "id", "food-detail-recipe-instruction-header", "div"
)

def description(self):
return self.soup.find("div", {"class": "copy-1"}).get_text().strip()

def nutrients(self):
bfcarpio marked this conversation as resolved.
Show resolved Hide resolved
result = {}

result["personal points"] = (
self.soup.find("div", {"class": "styles_points__2gv9n"})
.find("div", {"class": "styles_container__2p-YG"})
.get_text()
)

veggiepoints = self.soup.find(
"div", {"class": "styles_vegetableServings__2YSPy"}
)
if veggiepoints:
result["positive points"] = normalize_string(
veggiepoints.find(
"div", {"class": "styles_container__2p-YG"}
).next_sibling.get_text()
)

return result
66 changes: 66 additions & 0 deletions recipe_scrapers/weightwatcherspublic.py
@@ -0,0 +1,66 @@
# mypy: allow-untyped-defs

from ._utils import normalize_string
from .weightwatchers import WeightWatchers


# collect the differences between public and non-public weightwatcher recipes in this class
class WeightWatchersPublic(WeightWatchers):
@classmethod
def host(cls):
return "www.weightwatchers.com"

def _findDataContainer(self):
return self.soup.find("div", {"class": "HorizontalList_list__GESs0"})

def _extractItemField(self, item):
return item.find("div", {"data-e2e-name": "attribute_item_value"})

def image(self):
return self.soup.find("img", {"class": "FoodMasthead_heroImage__BjVdZ"})["src"]

def nutrients(self):
return {
"points": self.soup.find("div", {"class": "Coin_text__3UOb0"})["aria-label"]
}

def description(self):
return normalize_string(
self.soup.find("div", {"data-e2e-name": "food_masthead_detail_description"})
.find("div", {"class": "ReadMoreLess_collapsed__IAzxP"})
.get_text()
)

def instructions(self):
return self._getInstructions(
"h2", "class", "InstructionsFood_headline__vw7cn", "span"
)

def _findIngridientTags(self):
return (
self.soup.find("div", {"class": "IngredientsCard_card__VSY4x"})
.find("div", {"data-e2e-name": "vertical_list_items"})
.find_all("div", recursive=False)
)

def _extractIngridientName(self, ingridient):
return normalize_string(
ingridient.find("p", {"data-e2e-name": "ingredient_name"}).get_text()
)

def _extractPortionParts(self, ingridient):
tags = ingridient.find(
"p", {"data-e2e-name": "ingredient_description"}
).find_all("span")

comment = None
unit = None
if len(tags) > 2:
comment = normalize_string(tags[2].get_text().replace(", ", "", 1))
unit = normalize_string(tags[1].get_text())
else:
descriptionParts = normalize_string(tags[1].get_text()).split(", ", 1)
unit = descriptionParts[0]
comment = descriptionParts[1] if len(descriptionParts) > 1 else None

return (normalize_string(tags[0].get_text()), unit, comment)
191 changes: 191 additions & 0 deletions tests/test_data/weightwatchers.testhtml

Large diffs are not rendered by default.

191 changes: 191 additions & 0 deletions tests/test_data/weightwatchers_2.testhtml

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions tests/test_data/weightwatcherspublic.testhtml

Large diffs are not rendered by default.

83 changes: 83 additions & 0 deletions tests/test_weightwatchers.py
@@ -0,0 +1,83 @@
# mypy: allow-untyped-defs

from recipe_scrapers.weightwatchers import WeightWatchers
from tests import ScraperTest


class TestWeightwatchersScraper(ScraperTest):

# Test-Url:
# https://cmx.weightwatchers.de/details/WWRECIPE:5667ab72a29713e4335bb342

scraper_class = WeightWatchers

def test_host(self):
self.assertEqual("www.weightwatchers.com", self.harvester_class.host())

def test_author(self):
self.assertEqual("WeightWatchers", self.harvester_class.author())

def test_title(self):
self.assertEqual(self.harvester_class.title(), "Würstchengulasch mit Nudeln")

def test_category(self):
self.assertEqual("WeightWatchers", self.harvester_class.category())

def test_total_time(self):
self.assertEqual(25, self.harvester_class.total_time())

def test_cook_time(self):
self.assertEqual(0, self.harvester_class.cook_time())

def test_prep_time(self):
self.assertEqual(25, self.harvester_class.prep_time())

def test_yields(self):
self.assertEqual("2 servings", self.harvester_class.yields())

def test_image(self):
self.assertEqual(
"https://cmx.weightwatchers.com/assets-proxy/weight-watchers/image/upload/t_WINE_EXTRALARGE/i34cskr1hxegmxqukawd.jpg",
self.harvester_class.image(),
)

def test_ingredients(self):
self.assertEqual(
[
"2 Stück Geflügelwürstchen",
"1 Stück, klein Zwiebel/n",
"200 g Champignons, frisch; braun",
"120 g Nudeln, trocken, jede Sorte; Spiralnudeln",
"1 Prise(n) Salz/Jodsalz",
"2 TL Pflanzenöl, Rapsöl/Sonnenblumenöl",
"400 g Tomaten, passiert",
"1 Prise(n) Pfeffer",
"1⁄2 TL Paprikapulver",
],
self.harvester_class.ingredients(),
)

def test_ingredientsCount(self):
self.assertEqual(9, len(self.harvester_class.ingredients()))

def test_instructions(self):
self.assertEqual(
"Würstchen in Scheiben schneiden. Zwiebel schälen und würfeln. Champignons trocken abreiben und vierteln. Nudeln nach Packungsanweisung in Salzwasser garen.\nÖl in einem Topf erhitzen und Zwiebelwürfel darin andünsten. Würstchenscheiben und Champignonviertel zufügen und ca. 3 Minuten anbraten. Mit Tomaten ablöschen, aufkochen und ca. 5 Minuten köcheln lassen. Würstchengulasch mit Salz, Pfeffer und Paprikapulver würzen. Nudeln abgießen, untermischen und in einer Frischhaltedose transportieren. Würstchengulasch erwärmen und servieren.",
self.harvester_class.instructions(),
)

def test_description(self):
self.assertEqual(
"18 Uhr und alle haben Hunger? Dann koche rasch das Würstchengulasch und alle sind happy.",
self.harvester_class.description(),
)

def test_difficulty(self):
self.assertEqual("Leicht", self.harvester_class.difficulty())

def test_nutrients(self):
expected_nutrients = {
"personal points": "earn 12 personal points",
"positive points": "+2 Punkte von 2 Portion(en) Gemüse",
}
self.assertEqual(self.harvester_class.nutrients(), expected_nutrients)