-
Notifications
You must be signed in to change notification settings - Fork 500
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
3,227 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
# mypy: allow-untyped-defs | ||
|
||
import re | ||
from typing import List | ||
|
||
from ._abstract import AbstractScraper | ||
from ._grouping_utils import IngredientGroup | ||
from ._utils import normalize_string | ||
|
||
|
||
class Ploetzblog(AbstractScraper): | ||
@classmethod | ||
def host(cls): | ||
return "ploetzblog.de" | ||
|
||
def author(self): | ||
return self._get_script_string_field("authorName") | ||
|
||
def title(self): | ||
return self.soup.find("h1").text | ||
|
||
def category(self): | ||
return self.schema.category() | ||
|
||
def total_time(self): | ||
# Could also be scraped manually from the page text | ||
# Issue is that the time units are in German, which get_minutes does not work for | ||
return self._get_script_number_field("preparationTime") | ||
|
||
def yields(self): | ||
count_input = self.soup.find("input", {"id": "recipePieceCount"}) | ||
count = count_input.get("value") | ||
|
||
unit_td = count_input.parent.find_next_sibling("td") | ||
unit = normalize_string(unit_td.text) | ||
|
||
return f"{count} {unit}" | ||
|
||
def image(self): | ||
return self.schema.image() | ||
|
||
def ingredients(self): | ||
ingredients_div = self.soup.find( | ||
"div", {"class": "we2p-pb-recipe__ingredients"} | ||
) | ||
ingredients_table = ingredients_div.find_all("table")[1] | ||
return self._get_ingredients_from_table(ingredients_table) | ||
|
||
def ingredient_groups(self) -> List[IngredientGroup]: | ||
ingredient_groups = [] | ||
|
||
group_divs = self.soup.find_all( | ||
"div", {"class": "module-mb-4 vg-wort-text module-break-inside-avoid"} | ||
) | ||
for group_div in group_divs: | ||
h4 = group_div.find("h4") | ||
purpose = normalize_string(h4.text) | ||
|
||
ingredients_table = group_div.find("table") | ||
ingredients = self._get_ingredients_from_table(ingredients_table) | ||
|
||
ingredient_groups.append(IngredientGroup(ingredients, purpose=purpose)) | ||
|
||
return ingredient_groups | ||
|
||
def instructions(self): | ||
instruction_ps = self.soup.find_all( | ||
"p", {"class": "module-float-left module-my-auto we2p-autolinker"} | ||
) | ||
instructions = [ | ||
normalize_string(instruction.text) for instruction in instruction_ps | ||
] | ||
return "\n".join(instructions[:2]) | ||
|
||
def ratings(self): | ||
return self.schema.ratings() | ||
|
||
def cuisine(self): | ||
return self.schema.cuisine() | ||
|
||
def description(self): | ||
description_div = self.soup.find( | ||
"div", {"class": "we2p-pb-recipe__description"} | ||
) | ||
|
||
lines = [] | ||
for p in description_div.find_all("p"): | ||
lines.append(normalize_string(p.text)) | ||
|
||
return "\n".join(lines) | ||
|
||
def site_name(self): | ||
return "Plötzblog" | ||
|
||
def _get_ingredients_from_table(self, ingredients_table): | ||
ingredients = [] | ||
|
||
tr_list = ingredients_table.find_all("tr") | ||
for tr in tr_list: | ||
line = [] | ||
td_list = tr.find_all("td", limit=2) | ||
for td in td_list: | ||
span_list = td.find_all("span") | ||
for span in span_list: | ||
text = normalize_string(span.text) | ||
if text: | ||
line.append(text) | ||
ingredients.append(" ".join(line)) | ||
|
||
return ingredients | ||
|
||
def _get_script(self): | ||
main = self.soup.find("main", {"id": "main-content"}) | ||
script = main.find( | ||
"script", string=re.compile(r'"types":\["ForumPost","Recipe"\]') | ||
) | ||
return script | ||
|
||
def _get_field_name_pattern(self, field_name): | ||
return f'\\"{field_name}\\"\\s*:\\s*' | ||
|
||
def _get_script_string_field(self, field_name): | ||
script = self._get_script() | ||
|
||
result = re.search( | ||
self._get_field_name_pattern(field_name) + '\\"([^"]+)', script.string | ||
) | ||
if not result: | ||
return None | ||
|
||
return result.group(1) | ||
|
||
def _get_script_number_field(self, field_name): | ||
script = self._get_script() | ||
|
||
result = re.search( | ||
self._get_field_name_pattern(field_name) + "([^,]+)", script.string | ||
) | ||
if not result: | ||
return None | ||
|
||
return int(result.group(1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
{ | ||
"author": "Lutz Gei\\u00dfler", | ||
"canonical_url": "ploetzblog.de", | ||
"host": "ploetzblog.de", | ||
"description": "Mein bislang bestes Weizensauerteigbrot, ganz ohne Backhefe.\nGrobe bis mittlere, unregelmäßige Porung, wattige Krume und kaum spürbare, milde Säure. Der Teigling bekommt eine lange kalte Stückgare und entwickelt auch deshalb seinen wilden Trieb im Gusseisentopf.\nFür etwas mehr Charakter kann der Sauerteig mit Vollkornmehl angesetzt werden.\nWichtig ist das triebstarke und aktive Anstellgut, das 2 – 3 Mal vor dem Ansetzen des Sauerteiges bei 27 – 28 °C aufgefrischt werden sollte.\nHinweis: Wahlweise kann das Brot auch im auf 250 °C aufgeheizten Gusseisentopf 50 Minuten fallend auf 220 °C gebacken werden. Dann den Deckel nach 40 Minuten abnehmen.", | ||
"image": "https://webimages.we2p.de/2/ploetzblog/entity/gallery/619f68b528ae7154616ab768/Mildes_Weizensauerteigbrot_20160506.jpg", | ||
"ingredients": [ | ||
"558 g Weizenmehl 550", | ||
"389 g Wasser", | ||
"90 g Weizenanstellgut TA 200 (weich)", | ||
"13 g Salz" | ||
], | ||
"ingredient_groups": [ | ||
{ | ||
"ingredients": [ | ||
"90 g Wasser", | ||
"90 g Weizenmehl 550", | ||
"90 g Weizenanstellgut TA 200 (weich)" | ||
], | ||
"purpose": "Weizensauerteig" | ||
}, | ||
{ | ||
"ingredients": [ | ||
"13 g Salz", | ||
"298 g Wasser", | ||
"467 g Weizenmehl 550", | ||
"gesamter Weizensauerteig" | ||
], | ||
"purpose": "Hauptteig" | ||
} | ||
], | ||
"instructions": "Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.\nMischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C).", | ||
"instructions_list": [ | ||
"Die Zutaten in der genannten Reihenfolge in eine Schüssel wiegen.", | ||
"Mischen, bis sich die Zutaten zu einem weichen Teig verbunden haben (gewünschte Teigtemperatur: ca. 28 °C)." | ||
], | ||
"language": "de", | ||
"site_name": "Plötzblog", | ||
"title": "Mildes Weizensauerteigbrot", | ||
"total_time": 982, | ||
"yields": "1 Stück zu (je) ca. 1050 g", | ||
"_options": { | ||
"consistent_ingredient_groups": false | ||
} | ||
} |
Oops, something went wrong.