hhursev · mlduff · Apr 27, 2024 · May 4, 2024 · May 5, 2024
diff --git a/README.rst b/README.rst
@@ -300,6 +300,7 @@ Scrapers available for:
 - `https://www.pingodoce.pt/ <https://www.pingodoce.pt>`_
 - `https://pinkowlkitchen.com/ <https://pinkowlkitchen.com/>`_
 - `https://www.platingpixels.com/ <https://www.platingpixels.com/>`_
+- `https://www.ploetzblog.de/ <https://www.ploetzblog.de/>`_
 - `https://plowingthroughlife.com/ <https://plowingthroughlife.com/>`_
 - `https://popsugar.com/ <https://popsugar.com>`_
 - `https://practicalselfreliance.com/ <https://practicalselfreliance.com>`_

diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py
@@ -209,6 +209,7 @@
 from .pingodoce import PingoDoce
 from .pinkowlkitchen import PinkOwlKitchen
 from .platingpixels import PlatingPixels
+from .ploetzblog import Ploetzblog
 from .plowingthroughlife import PlowingThroughLife
 from .popsugar import PopSugar
 from .practicalselfreliance import PracticalSelfReliance
@@ -388,6 +389,7 @@
     MyJewishLearning.host(): MyJewishLearning,
     NutritionFacts.host(): NutritionFacts,
     PinchOfYum.host(): PinchOfYum,
+    Ploetzblog.host(): Ploetzblog,
     Recept.host(): Recept,
     RicettePerBimby.host(): RicettePerBimby,
     StrongrFastr.host(): StrongrFastr,

diff --git a/recipe_scrapers/ploetzblog.py b/recipe_scrapers/ploetzblog.py
@@ -0,0 +1,142 @@
+# mypy: allow-untyped-defs
+
+import re
+from typing import List
+
+from ._abstract import AbstractScraper
+from ._grouping_utils import IngredientGroup
+from ._utils import normalize_string
+
+
+class Ploetzblog(AbstractScraper):
+    @classmethod
+    def host(cls):
+        return "ploetzblog.de"
+
+    def author(self):
+        return self._get_script_string_field("authorName")
+
+    def title(self):
+        return self.soup.find("h1").text
+
+    def category(self):
+        return self.schema.category()
+
+    def total_time(self):
+        # Could also be scraped manually from the page text
+        # Issue is that the time units are in German, which get_minutes does not work for
+        return self._get_script_number_field("preparationTime")
+
+    def yields(self):
+        count_input = self.soup.find("input", {"id": "recipePieceCount"})
+        count = count_input.get("value")
+
+        unit_td = count_input.parent.find_next_sibling("td")
+        unit = normalize_string(unit_td.text)
+
+        return f"{count} {unit}"
+
+    def image(self):
+        return self.schema.image()
+
+    def ingredients(self):
+        ingredients_div = self.soup.find(
+            "div", {"class": "we2p-pb-recipe__ingredients"}
+        )
+        ingredients_table = ingredients_div.find_all("table")[1]
+        return self._get_ingredients_from_table(ingredients_table)
+
+    def ingredient_groups(self) -> List[IngredientGroup]:
+        ingredient_groups = []
+
+        group_divs = self.soup.find_all(
+            "div", {"class": "module-mb-4 vg-wort-text module-break-inside-avoid"}
+        )
+        for group_div in group_divs:
+            h4 = group_div.find("h4")
+            purpose = normalize_string(h4.text)
+
+            ingredients_table = group_div.find("table")
+            ingredients = self._get_ingredients_from_table(ingredients_table)
+
+            ingredient_groups.append(IngredientGroup(ingredients, purpose=purpose))
+
+        return ingredient_groups
+
+    def instructions(self):
+        instruction_ps = self.soup.find_all(
+            "p", {"class": "module-float-left module-my-auto we2p-autolinker"}
+        )
+        instructions = [
+            normalize_string(instruction.text) for instruction in instruction_ps
+        ]
+        return "\n".join(instructions[:2])
+
+    def ratings(self):
+        return self.schema.ratings()
+
+    def cuisine(self):
+        return self.schema.cuisine()
+
+    def description(self):
+        description_div = self.soup.find(
+            "div", {"class": "we2p-pb-recipe__description"}
+        )
+
+        lines = []
+        for p in description_div.find_all("p"):
+            lines.append(normalize_string(p.text))
+
+        return "\n".join(lines)
+
+    def site_name(self):
+        return "Plötzblog"
+
+    def _get_ingredients_from_table(self, ingredients_table):
+        ingredients = []
+
+        tr_list = ingredients_table.find_all("tr")
+        for tr in tr_list:
+            line = []
+            td_list = tr.find_all("td", limit=2)
+            for td in td_list:
+                span_list = td.find_all("span")
+                for span in span_list:
+                    text = normalize_string(span.text)
+                    if text:
+                        line.append(text)
+            ingredients.append(" ".join(line))
+
+        return ingredients
+
+    def _get_script(self):
+        main = self.soup.find("main", {"id": "main-content"})
+        script = main.find(
+            "script", string=re.compile(r'"types":\["ForumPost","Recipe"\]')
+        )
+        return script
+
+    def _get_field_name_pattern(self, field_name):
+        return f'\\"{field_name}\\"\\s*:\\s*'
+
+    def _get_script_string_field(self, field_name):
+        script = self._get_script()
+
+        result = re.search(
+            self._get_field_name_pattern(field_name) + '\\"([^"]+)', script.string
+        )
+        if not result:
+            return None
+
+        return result.group(1)
+
+    def _get_script_number_field(self, field_name):
+        script = self._get_script()
+
+        result = re.search(
+            self._get_field_name_pattern(field_name) + "([^,]+)", script.string
+        )
+        if not result:
+            return None
+
+        return int(result.group(1))
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,39 +1,8 @@
-import json
 import pathlib
 import unittest
 from typing import Callable
 
-from recipe_scrapers import scrape_html
-from recipe_scrapers._grouping_utils import IngredientGroup
-
-MANDATORY_TESTS = [
-    "author",
-    "canonical_url",
-    "host",
-    "description",
-    "image",
-    "ingredients",
-    "ingredient_groups",
-    "instructions",
-    "instructions_list",
-    "language",
-    "site_name",
-    "title",
-    "total_time",
-    "yields",
-]
-
-OPTIONAL_TESTS = [
-    "category",
-    "cook_time",
-    "cuisine",
-    "nutrients",
-    "prep_time",
-    "cooking_method",
-    "ratings",
-    "reviews",
-    "equipment",
-]
+from .data_utils import load_test, run_mandatory_tests, run_optional_test
 
 
 class RecipeTestCase(unittest.TestCase):
@@ -72,43 +41,10 @@ def test_func_factory(
     """
 
     def test_func(self):
-        with open(testjson, encoding="utf-8") as f:
-            expect = json.load(f)
-            expect["ingredient_groups"] = [
-                IngredientGroup(**group)
-                for group in expect.get("ingredient_groups", [])
-            ]
-        actual = scrape_html(testhtml.read_text(encoding="utf-8"), host)
-
-        # Mandatory tests
-        # If the key isn't present, check an assertion is raised
-        for key in MANDATORY_TESTS:
-            with self.subTest(key):
-                scraper_func = getattr(actual, key)
-                if key in expect.keys():
-                    self.assertEqual(
-                        expect[key],
-                        scraper_func(),
-                        msg=f"The actual value for .{key}() did not match the expected value.",
-                    )
-                else:
-                    with self.assertRaises(
-                        Exception,
-                        msg=f".{key}() was expected to raise an exception but it did not.",
-                    ):
-                        scraper_func()
-
-        # Optional tests
-        # If the key isn't present, skip
-        for key in OPTIONAL_TESTS:
-            with self.subTest(key):
-                scraper_func = getattr(actual, key)
-                if key in expect.keys():
-                    self.assertEqual(
-                        expect[key],
-                        scraper_func(),
-                        msg=f"The actual value for .{key}() did not match the expected value.",
-                    )
+        expect, actual = load_test(host, testhtml, testjson)
+
+        run_mandatory_tests(self, expect, actual)
+        run_optional_test(self, expect, actual)
 
         # Assert that the ingredients returned by the ingredient_groups() function
         # are the same as the ingredients return by the ingredients() function.
@@ -176,6 +112,9 @@ def load_tests(
     tests = loader.loadTestsFromTestCase(RecipeTestCase)
     suite.addTest(tests)
 
+    data_driven_tests = loader.discover("tests/data_driven")
+    suite.addTests(data_driven_tests)
+
     # Add library tests to test suite
     library_tests = loader.discover("tests/library")
     suite.addTests(library_tests)

diff --git a/tests/data_driven/__init__.py b/tests/data_driven/__init__.py
diff --git a/tests/data_driven/test_data/ploetzblog.de/ploetzblog.json b/tests/data_driven/test_data/ploetzblog.de/ploetzblog.json
@@ -0,0 +1,42 @@
+{
+  "author": "Lutz Gei\\u00dfler",
+  "canonical_url": "ploetzblog.de",
+  "host": "ploetzblog.de",
+  "description": "Mein bislang bestes Weizensauerteigbrot, ganz ohne Backhefe.\nGrobe bis mittlere, unregelmäßige Porung, wattige Krume und kaum spürbare, milde Säure. Der Teigling bekommt eine lange kalte Stückgare und entwickelt auch deshalb seinen wilden Trieb im Gusseisentopf.\nFür etwas mehr Charakter kann der Sauerteig mit Vollkornmehl angesetzt werden.\nWichtig ist das triebstarke und aktive Anstellgut, das 2 – 3 Mal vor dem Ansetzen des Sauerteiges bei 27 – 28 °C aufgefrischt werden sollte.\nHinweis: Wahlweise kann das Brot auch im auf 250 °C aufgeheizten Gusseisentopf 50 Minuten fallend auf 220 °C gebacken werden. Dann den Deckel nach 40 Minuten abnehmen.",
+  "image": "https://webimages.we2p.de/2/ploetzblog/entity/gallery/619f68b528ae7154616ab768/Mildes_Weizensauerteigbrot_20160506.jpg",
+  "ingredients": [
+    "558 g Weizenmehl 550",
+    "389 g Wasser",
+    "90 g Weizenanstellgut TA 200 (weich)",
+    "13 g Salz"
+  ],
+  "ingredient_groups": [
+    {
+      "ingredients": [
+        "90 g Wasser",
+        "90 g Weizenmehl 550",
+        "90 g Weizenanstellgut TA 200 (weich)"
+      ],
+      "purpose": "Weizensauerteig"
+    },
+    {
+      "ingredients": [
+        "13 g Salz",
+        "298 g Wasser",
+        "467 g Weizenmehl 550",
+        "gesamter Weizensauerteig"
+      ],
+      "purpose": "Hauptteig"
+    }
+  ],
+  "instructions": "Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.\nMischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C).",
+  "instructions_list": [
+    "Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.",
+    "Mischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C)."
+  ],
+  "language": "de",
+  "site_name": "Plötzblog",
+  "title": "Mildes Weizensauerteigbrot",
+  "total_time": 982,
+  "yields": "1 Stück zu (je) ca. 1050 g"
+}