Ploetzblog and test configurability

hhursev · Apr 27, 2024 · 9790ad7 · 9790ad7
1 parent 4a11ec8
commit 9790ad7
Show file tree

Hide file tree

Showing 6 changed files with 3,227 additions and 7 deletions.
diff --git a/README.rst b/README.rst
@@ -300,6 +300,7 @@ Scrapers available for:
 - `https://www.pingodoce.pt/ <https://www.pingodoce.pt>`_
 - `https://pinkowlkitchen.com/ <https://pinkowlkitchen.com/>`_
 - `https://www.platingpixels.com/ <https://www.platingpixels.com/>`_
+- `https://www.ploetzblog.de/ <https://www.ploetzblog.de/>`_
 - `https://plowingthroughlife.com/ <https://plowingthroughlife.com/>`_
 - `https://popsugar.com/ <https://popsugar.com>`_
 - `https://practicalselfreliance.com/ <https://practicalselfreliance.com>`_

diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py
@@ -209,6 +209,7 @@
 from .pingodoce import PingoDoce
 from .pinkowlkitchen import PinkOwlKitchen
 from .platingpixels import PlatingPixels
+from .ploetzblog import Ploetzblog
 from .plowingthroughlife import PlowingThroughLife
 from .popsugar import PopSugar
 from .practicalselfreliance import PracticalSelfReliance
@@ -388,6 +389,7 @@
     MyJewishLearning.host(): MyJewishLearning,
     NutritionFacts.host(): NutritionFacts,
     PinchOfYum.host(): PinchOfYum,
+    Ploetzblog.host(): Ploetzblog,
     Recept.host(): Recept,
     RicettePerBimby.host(): RicettePerBimby,
     StrongrFastr.host(): StrongrFastr,

diff --git a/recipe_scrapers/ploetzblog.py b/recipe_scrapers/ploetzblog.py
@@ -0,0 +1,142 @@
+# mypy: allow-untyped-defs
+
+import re
+from typing import List
+
+from ._abstract import AbstractScraper
+from ._grouping_utils import IngredientGroup
+from ._utils import normalize_string
+
+
+class Ploetzblog(AbstractScraper):
+    @classmethod
+    def host(cls):
+        return "ploetzblog.de"
+
+    def author(self):
+        return self._get_script_string_field("authorName")
+
+    def title(self):
+        return self.soup.find("h1").text
+
+    def category(self):
+        return self.schema.category()
+
+    def total_time(self):
+        # Could also be scraped manually from the page text
+        # Issue is that the time units are in German, which get_minutes does not work for
+        return self._get_script_number_field("preparationTime")
+
+    def yields(self):
+        count_input = self.soup.find("input", {"id": "recipePieceCount"})
+        count = count_input.get("value")
+
+        unit_td = count_input.parent.find_next_sibling("td")
+        unit = normalize_string(unit_td.text)
+
+        return f"{count} {unit}"
+
+    def image(self):
+        return self.schema.image()
+
+    def ingredients(self):
+        ingredients_div = self.soup.find(
+            "div", {"class": "we2p-pb-recipe__ingredients"}
+        )
+        ingredients_table = ingredients_div.find_all("table")[1]
+        return self._get_ingredients_from_table(ingredients_table)
+
+    def ingredient_groups(self) -> List[IngredientGroup]:
+        ingredient_groups = []
+
+        group_divs = self.soup.find_all(
+            "div", {"class": "module-mb-4 vg-wort-text module-break-inside-avoid"}
+        )
+        for group_div in group_divs:
+            h4 = group_div.find("h4")
+            purpose = normalize_string(h4.text)
+
+            ingredients_table = group_div.find("table")
+            ingredients = self._get_ingredients_from_table(ingredients_table)
+
+            ingredient_groups.append(IngredientGroup(ingredients, purpose=purpose))
+
+        return ingredient_groups
+
+    def instructions(self):
+        instruction_ps = self.soup.find_all(
+            "p", {"class": "module-float-left module-my-auto we2p-autolinker"}
+        )
+        instructions = [
+            normalize_string(instruction.text) for instruction in instruction_ps
+        ]
+        return "\n".join(instructions[:2])
+
+    def ratings(self):
+        return self.schema.ratings()
+
+    def cuisine(self):
+        return self.schema.cuisine()
+
+    def description(self):
+        description_div = self.soup.find(
+            "div", {"class": "we2p-pb-recipe__description"}
+        )
+
+        lines = []
+        for p in description_div.find_all("p"):
+            lines.append(normalize_string(p.text))
+
+        return "\n".join(lines)
+
+    def site_name(self):
+        return "Plötzblog"
+
+    def _get_ingredients_from_table(self, ingredients_table):
+        ingredients = []
+
+        tr_list = ingredients_table.find_all("tr")
+        for tr in tr_list:
+            line = []
+            td_list = tr.find_all("td", limit=2)
+            for td in td_list:
+                span_list = td.find_all("span")
+                for span in span_list:
+                    text = normalize_string(span.text)
+                    if text:
+                        line.append(text)
+            ingredients.append(" ".join(line))
+
+        return ingredients
+
+    def _get_script(self):
+        main = self.soup.find("main", {"id": "main-content"})
+        script = main.find(
+            "script", string=re.compile(r'"types":\["ForumPost","Recipe"\]')
+        )
+        return script
+
+    def _get_field_name_pattern(self, field_name):
+        return f'\\"{field_name}\\"\\s*:\\s*'
+
+    def _get_script_string_field(self, field_name):
+        script = self._get_script()
+
+        result = re.search(
+            self._get_field_name_pattern(field_name) + '\\"([^"]+)', script.string
+        )
+        if not result:
+            return None
+
+        return result.group(1)
+
+    def _get_script_number_field(self, field_name):
+        script = self._get_script()
+
+        result = re.search(
+            self._get_field_name_pattern(field_name) + "([^,]+)", script.string
+        )
+        if not result:
+            return None
+
+        return int(result.group(1))
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,6 +1,7 @@
 import json
 import pathlib
 import unittest
+from enum import Enum
 from typing import Callable
 
 from recipe_scrapers import scrape_html
@@ -35,6 +36,33 @@
     "equipment",
 ]
 
+OPTIONS_KEY = "_options"
+
+
+class TestOptions(Enum):
+    CONSISTENT_INGREDIENT_GROUPS = ("consistent_ingredient_groups", True)
+    """
+    Controls if the consistent ingredient groups test is run.
+    Disable if ingredient groups contain sub-quantities of the same ingredient (as the test will fail).
+    """
+
+    def __new__(cls, value: str, default):
+        obj = object.__new__(cls)
+        obj._value_ = value
+        return obj
+
+    def __init__(self, value: str, default: str) -> None:
+        self.default = default
+
+
+def get_options(expect):
+    options = {}
+    for option in TestOptions:
+        # Checks if the option has been set in the test
+        # Tolerates both the options node and the specific option not being defined
+        options[option] = expect.get(OPTIONS_KEY, {}).get(option.value, option.default)
+    return options
+
 
 class RecipeTestCase(unittest.TestCase):
     maxDiff = None
@@ -80,6 +108,8 @@ def test_func(self):
             ]
         actual = scrape_html(testhtml.read_text(encoding="utf-8"), host)
 
+        options = get_options(expect)
+
         # Mandatory tests
         # If the key isn't present, check an assertion is raised
         for key in MANDATORY_TESTS:
@@ -110,14 +140,15 @@ def test_func(self):
                         msg=f"The actual value for .{key}() did not match the expected value.",
                     )
 
-        # Assert that the ingredients returned by the ingredient_groups() function
-        # are the same as the ingredients return by the ingredients() function.
-        grouped = []
-        for group in actual.ingredient_groups():
-            grouped.extend(group.ingredients)
+        if options.get(TestOptions.CONSISTENT_INGREDIENT_GROUPS):
+            # Assert that the ingredients returned by the ingredient_groups() function
+            # are the same as the ingredients return by the ingredients() function.
+            grouped = []
+            for group in actual.ingredient_groups():
+                grouped.extend(group.ingredients)
 
-        with self.subTest("ingredient_groups"):
-            self.assertEqual(sorted(actual.ingredients()), sorted(grouped))
+            with self.subTest("ingredient_groups"):
+                self.assertEqual(sorted(actual.ingredients()), sorted(grouped))
 
     return test_func
 

diff --git a/tests/test_data/ploetzblog.de/ploetzblog.json b/tests/test_data/ploetzblog.de/ploetzblog.json
@@ -0,0 +1,45 @@
+{
+  "author": "Lutz Gei\\u00dfler",
+  "canonical_url": "ploetzblog.de",
+  "host": "ploetzblog.de",
+  "description": "Mein bislang bestes Weizensauerteigbrot, ganz ohne Backhefe.\nGrobe bis mittlere, unregelmäßige Porung, wattige Krume und kaum spürbare, milde Säure. Der Teigling bekommt eine lange kalte Stückgare und entwickelt auch deshalb seinen wilden Trieb im Gusseisentopf.\nFür etwas mehr Charakter kann der Sauerteig mit Vollkornmehl angesetzt werden.\nWichtig ist das triebstarke und aktive Anstellgut, das 2 – 3 Mal vor dem Ansetzen des Sauerteiges bei 27 – 28 °C aufgefrischt werden sollte.\nHinweis: Wahlweise kann das Brot auch im auf 250 °C aufgeheizten Gusseisentopf 50 Minuten fallend auf 220 °C gebacken werden. Dann den Deckel nach 40 Minuten abnehmen.",
+  "image": "https://webimages.we2p.de/2/ploetzblog/entity/gallery/619f68b528ae7154616ab768/Mildes_Weizensauerteigbrot_20160506.jpg",
+  "ingredients": [
+    "558 g Weizenmehl 550",
+    "389 g Wasser",
+    "90 g Weizenanstellgut TA 200 (weich)",
+    "13 g Salz"
+  ],
+  "ingredient_groups": [
+    {
+      "ingredients": [
+        "90 g Wasser",
+        "90 g Weizenmehl 550",
+        "90 g Weizenanstellgut TA 200 (weich)"
+      ],
+      "purpose": "Weizensauerteig"
+    },
+    {
+      "ingredients": [
+        "13 g Salz",
+        "298 g Wasser",
+        "467 g Weizenmehl 550",
+        "gesamter Weizensauerteig"
+      ],
+      "purpose": "Hauptteig"
+    }
+  ],
+  "instructions": "Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.\nMischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C).",
+  "instructions_list": [
+    "Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.",
+    "Mischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C)."
+  ],
+  "language": "de",
+  "site_name": "Plötzblog",
+  "title": "Mildes Weizensauerteigbrot",
+  "total_time": 982,
+  "yields": "1 Stück zu (je) ca. 1050 g",
+  "_options": {
+    "consistent_ingredient_groups": false
+  }
+}