Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Idea/suggestion: refactor: describe unavailable/static fields using exceptions. #1132

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 15 additions & 0 deletions recipe_scrapers/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,18 @@ class SchemaOrgException(RecipeScrapersExceptions):

def __init__(self, message):
super().__init__(message)


class StaticValueException(RecipeScrapersExceptions):
"""Error to communicate that the scraper is returning a fixed/static value."""

def __init__(self, *, return_value):
self.return_value = return_value
message = f"Suggested return value {return_value} is not from recipe source."
super().__init__(message)


class FieldNotProvidedByWebsiteException(StaticValueException):
"""Error when, as far as we know, the website does not provide this info for any recipes."""

...
10 changes: 10 additions & 0 deletions recipe_scrapers/_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class StaticValueWarning(Warning):
"""Emitted when a field is annotated as returning a fixed/static value."""

...


class FieldNotProvidedByWebsiteWarning(StaticValueWarning):
"""Emitted when a requested field doesn't seem to be provided by the recipe website."""

...
3 changes: 2 additions & 1 deletion recipe_scrapers/bestrecipes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import StaticValueException


class BestRecipes(AbstractScraper):
Expand All @@ -9,4 +10,4 @@ def host(cls):
return "bestrecipes.com.au"

def language(self):
return "en-AU"
raise StaticValueException(return_value="en-AU")
3 changes: 2 additions & 1 deletion recipe_scrapers/chefnini.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re

from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException


class Chefnini(AbstractScraper):
Expand All @@ -20,7 +21,7 @@ def category(self):
return self.schema.category()

def total_time(self):
return None
raise FieldNotProvidedByWebsiteException(return_value=None)

def yields(self):
recipe_yields_text = self.soup.find(
Expand Down
4 changes: 4 additions & 0 deletions recipe_scrapers/davidlebovitz.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException


class DavidLebovitz(AbstractScraper):
@classmethod
def host(cls):
return "davidlebovitz.com"

def total_time(self):
raise FieldNotProvidedByWebsiteException(return_value=None)
4 changes: 4 additions & 0 deletions recipe_scrapers/farmhousedelivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from bs4 import Tag

from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException
from ._utils import normalize_string

"""
Expand All @@ -20,6 +21,9 @@ def host(cls, domain="com"):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).get_text(strip=True)

def total_time(self):
raise FieldNotProvidedByWebsiteException(return_value=None)

def ingredients(self):
# Style 1
ingredients_marker = self.soup.find("p", string=re.compile(r"Ingredients:"))
Expand Down
23 changes: 2 additions & 21 deletions recipe_scrapers/felixkitchen.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,12 @@
# mypy: allow-untyped-defs

import warnings

from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException
from ._utils import normalize_string

MARK_SEPARATOR = " "
INGREDIENT_SEPARATOR = "• "

BUG_REPORT_LINK = "https://github.com/hhursev/recipe-scrapers/issues"


def _field_not_provided_by_website_warning(host, field):
class FieldNotProvidedByWebsiteWarning(Warning):
pass

message = (
"{} doesn't seem to support the {} field. "
"If you know this to be untrue for some recipe, please submit a bug report at {}"
)

warnings.warn(
message.format(host, field, BUG_REPORT_LINK),
category=FieldNotProvidedByWebsiteWarning,
)


class FelixKitchen(AbstractScraper):
@classmethod
Expand All @@ -42,8 +24,7 @@ def title(self):
return normalize_string(found.get("content"))

def total_time(self):
_field_not_provided_by_website_warning(self.host(), "total_time")
return None
raise FieldNotProvidedByWebsiteException(return_value=None)

def yields(self):
heading_p = self.soup.find("p", {"class": "ti"})
Expand Down
4 changes: 4 additions & 0 deletions recipe_scrapers/kwestiasmaku.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException
from ._utils import get_yields, normalize_string


Expand All @@ -8,6 +9,9 @@ class KwestiaSmaku(AbstractScraper):
def host(cls):
return "kwestiasmaku.com"

def total_time(self):
raise FieldNotProvidedByWebsiteException(return_value=None)

def yields(self):
return get_yields(
self.soup.find("div", {"class": "field-name-field-ilosc-porcji"})
Expand Down
4 changes: 4 additions & 0 deletions recipe_scrapers/owenhan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException


class OwenHan(AbstractScraper):
Expand All @@ -13,6 +14,9 @@ def author(self):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).text

def total_time(self):
raise FieldNotProvidedByWebsiteException(return_value=None)

def ingredients(self):
return [x for x in map(lambda x: x.text, self.soup.select("ul > li"))]

Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from .normalize_string import NormalizeStringPlugin
from .opengraph_image_fetch import OpenGraphImageFetchPlugin
from .schemaorg_fill import SchemaOrgFillPlugin
from .static_values import StaticValueExceptionHandlingPlugin

__all__ = [
"ExceptionHandlingPlugin",
"StaticValueExceptionHandlingPlugin",
"HTMLTagStripperPlugin",
"NormalizeStringPlugin",
"OpenGraphImageFetchPlugin",
Expand Down
68 changes: 68 additions & 0 deletions recipe_scrapers/plugins/static_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# mypy: allow-untyped-defs

import functools
import warnings

from recipe_scrapers._exceptions import (
FieldNotProvidedByWebsiteException,
StaticValueException,
)
from recipe_scrapers._warnings import (
FieldNotProvidedByWebsiteWarning,
StaticValueWarning,
)
from recipe_scrapers.plugins._interface import PluginInterface


class StaticValueExceptionHandlingPlugin(PluginInterface):
"""
Handles cases where a scraper indicates that it returns a static value --
perhaps because the website never provides info for that method at all
(communicated by FieldNotProvidedByWebsiteException), or because for some
reason it is easier or more convenient to define statically (communicated
by StaticValueException).

Objects of StaticValueException and subclasses include a return value, so
we return that to the caller instead after emitting a suitable warning for
use by developers/users.
"""

BUG_REPORT_LINK = "https://github.com/hhursev/recipe-scrapers/issues"

run_on_hosts = ("*",)
run_on_methods = (
"author",
"site_name",
"language",
"cuisine",
"cooking_method",
"total_time",
"yields",
)

@classmethod
def run(cls, decorated):
@functools.wraps(decorated)
def decorated_method_wrapper(self, *args, **kwargs):
try:
return decorated(self, *args, **kwargs)
except FieldNotProvidedByWebsiteException as e:
message = (
f"{self.host()} doesn't seem to support the {decorated.__name__} field. "
"If you know this to be untrue for some recipe, please submit a bug report at "
f"{StaticValueExceptionHandlingPlugin.BUG_REPORT_LINK}"
)
warnings.warn(
message=message, category=FieldNotProvidedByWebsiteWarning
)
return e.return_value
except StaticValueException as e:
message = (
f"{self.host()} returns a constant value from the {decorated.__name__} field. "
"If you believe we can and should determine that dynamically, please submit a "
f"bug report at {StaticValueExceptionHandlingPlugin.BUG_REPORT_LINK}"
)
warnings.warn(message=message, category=StaticValueWarning)
return e.return_value

return decorated_method_wrapper
3 changes: 2 additions & 1 deletion recipe_scrapers/potatorolls.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import StaticValueException
from ._grouping_utils import group_ingredients
from ._utils import get_yields, normalize_string

Expand Down Expand Up @@ -41,7 +42,7 @@ def instructions(self):
)

def language(self):
return "en-US"
raise StaticValueException(return_value="en-US")

def yields(self):
serve_element = self.soup.find(
Expand Down
4 changes: 4 additions & 0 deletions recipe_scrapers/rosannapansino.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import FieldNotProvidedByWebsiteException
from ._utils import normalize_string


Expand All @@ -12,6 +13,9 @@ def host(cls):
def title(self):
return self.soup.find("meta", {"property": "og:title"})["content"]

def total_time(self):
raise FieldNotProvidedByWebsiteException(return_value=None)

def ingredients(self):
ingredients = (
self.soup.find("div", {"class": "recipe-left"})
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/settings/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
NormalizeStringPlugin,
OpenGraphImageFetchPlugin,
SchemaOrgFillPlugin,
StaticValueExceptionHandlingPlugin,
)

# Plugins to be attached.
# The upper most plugin is the "outer most" executed.
# Check recipe_scrapers.settings.template.py for ways to extend.
PLUGINS = (
ExceptionHandlingPlugin,
StaticValueExceptionHandlingPlugin,
HTMLTagStripperPlugin,
NormalizeStringPlugin,
OpenGraphImageFetchPlugin,
Expand Down
16 changes: 2 additions & 14 deletions recipe_scrapers/thecookingguy.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
# mypy: allow-untyped-defs
import warnings

from ._abstract import AbstractScraper
from ._exceptions import ElementNotFoundInHtml
from ._exceptions import ElementNotFoundInHtml, FieldNotProvidedByWebsiteException
from ._grouping_utils import IngredientGroup
from ._utils import get_yields, normalize_string

BUG_REPORT_LINK = "https://github.com/hhursev/recipe-scrapers"
_null_return_warning = (
"Hm. Apparently {} doesn't provide {} values? "
"If you know that's untrue for some recipe, "
"let us know at {} by creating an issue with "
"the bug label."
)


class TheCookingGuy(AbstractScraper):
@classmethod
Expand All @@ -27,10 +18,7 @@ def title(self):
return self.schema.title()

def total_time(self):
warnings.warn(
_null_return_warning.format(self.host(), "total_time", BUG_REPORT_LINK)
)
return None
raise FieldNotProvidedByWebsiteException(return_value=None)

def yields(self):
return get_yields(self.soup.find("div", class_="text-block-7").get_text())
Expand Down
3 changes: 2 additions & 1 deletion recipe_scrapers/woolworths.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests

from ._abstract import HEADERS, AbstractScraper
from ._exceptions import StaticValueException
from ._schemaorg import SchemaOrg
from ._utils import url_path_to_dict

Expand Down Expand Up @@ -39,7 +40,7 @@ def nutrients(self):
return self.schema.nutrients()

def language(self):
return "en-AU"
raise StaticValueException(return_value="en-AU")

def site_name(self):
return "Woolworths | Fresh Ideas For You"
1 change: 1 addition & 0 deletions tests/test_data/davidlebovitz.com/davidlebovitz.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"Scrape the pâté into a small serving bowl and refrigerate for a few hours, until firm."
],
"category": null,
"total_time": null,
"description": "Adapted from Très Green, Très Clean, Très Chic by Rebecca Leffler Lentils double in volume when cooked, so 1 cup (160g) of dried lentils will yield close to the correct amount. They usually take about 20 to 30 minutes to cook until soft, but check the directions on the package for specific guidelines. If avoiding gluten, use tamari instead of soy sauce. For a vegan version, replace the butter with the same quantity of olive oil, for a total of 1/4 cup (60ml) of olive oil. The cognac or brandy is optional, but it does give the faux gras a little je ne sais quoi.",
"image": "https://www.davidlebovitz.com/wp-content/uploads/2015/06/Faux-Gras-Lentil-Pate-8.jpg"
}
1 change: 1 addition & 0 deletions tests/test_data/kwestiasmaku.com/kwestiasmaku.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"Wstawić do nagrzanego piekarnika i piec przez 30 minut. Po upieczeniu doprawić solą i posiekaną natką pietruszki."
],
"category": null,
"total_time": null,
"yields": "2 servings",
"description": "To sprawdzony sposób na pieczonego kalafiora :) smakowity w wersji z kurkumą i czarnuszką, ale równie dobry z innym zestawem ziół. Sprawdzą się oregano, rozmaryn, papryka, ale również garam masala oraz przyprawa curry. Warto dodać kurkumę dla koloru. W zasadzie do zapamiętania są tutaj parametry pieczenia, a mianowicie 200 stopni C - 30 minut. Kalafiora pieczemy na dużej blaszce. Kalafiora można posypać po upieczeniu zieleniną, można podać z pokruszoną fetą lub sosem czosnkowym. Sprawdzi się jako przekąska lub dodatek np. do sałatki.",
"ratings": 4.77,
Expand Down
1 change: 1 addition & 0 deletions tests/test_data/owen-han.com/owenhan.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,6 @@
"Assemble: to a toasted baguette add the chicken and bacon then top with sliced avocado pickled onions, and ranch. Add some mixed greens to the other half. ",
"Close the sandwich, cut in half and enjoy!"
],
"total_time": null,
"image": "http://static1.squarespace.com/static/627be79397093e2de753b260/627c408602fed77ca384eb11/63120c4090e9bf706973d712/1662127792157/IMG_2037.jpg?format=1500w"
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@
"instructions_list": [
"Saute onions & garlic in olive oil in a large skillet until they take on a little color. Add jalapeno and continue cooking for 1-2 minutes. Add chopped greens, season with salt and pepper to taste and cover until greens are just wilted. Add cream and bring to a simmer. Crack eggs on top of greens, cover and cook until eggs are cooked to your preference. Serve in wide bowls with a dollop of yogurt and a drizzle of hot sauce and thick slices of warm bread on the side."
],
"total_time": null,
"image": "http://recipesfhd.wpengine.com/wp-content/uploads/2020/04/no-blob_clean-handle-1024x683.jpg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@
"❻ Roast for 55 minutes, then broil for 3 minutes for an extra crisp factor! Enjoy!",
"– For more lifestyle tips and recipes from Rebecca, you can find her on Instagram at @xxrlilly"
],
"total_time": null,
"image": "http://recipesfhd.wpengine.com/wp-content/uploads/2019/05/FHD-Chic-Thigh-Recipe-225x300.jpeg"
}
1 change: 1 addition & 0 deletions tests/test_data/rosannapansino.com/rosannapansino.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"Oil your hands to prevent the marshmallow from sticking to you. Knead in the sugar until you have the consistency of soft taffy and the fondant no longer sticks to your hands. If you are tinting the fondant, add food coloring now and knead until the color is fully incorporated. (Coat your hands with shortening so the colors don't stain them.)",
"If you are not using the fondant right away, store it tightly wrapped or in a plastic bag at room temperature."
],
"total_time": null,
"description": "A delicious and easy Marshmallow Fondant recipe!\nMakes 24 ounces",
"image": "http://rosannapansino.com/cdn/shop/articles/1Y6A6281.jpg?v=1567109086"
}