Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the scraper-generator template. Inherit from abc.ABC in AbstractScraper #664

Merged
merged 7 commits into from Dec 16, 2022
9 changes: 8 additions & 1 deletion recipe_scrapers/_abstract.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
import inspect
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin
Expand All @@ -17,7 +18,7 @@
}


class AbstractScraper:
class AbstractScraper(ABC):
page_data: Union[str, bytes]

def __init__(
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(
setattr(self.__class__, "plugins_initialized", True)

@classmethod
@abstractmethod
def host(cls) -> str:
"""get the host of the url, so we can use the correct scraper"""
raise NotImplementedError("This should be implemented.")
Expand All @@ -71,12 +73,14 @@ def canonical_url(self):
return urljoin(self.url, canonical_link["href"])
return self.url

@abstractmethod
def title(self):
raise NotImplementedError("This should be implemented.")

def category(self):
raise NotImplementedError("This should be implemented.")

@abstractmethod
def total_time(self):
"""total time it takes to preparate and cook the recipe in minutes"""
raise NotImplementedError("This should be implemented.")
Expand Down Expand Up @@ -130,9 +134,11 @@ def language(self):
# Return the first candidate language
return candidate_languages.popitem(last=False)[0]

@abstractmethod
def ingredients(self):
raise NotImplementedError("This should be implemented.")

@abstractmethod
def instructions(self) -> str:
"""instructions to prepare the recipe"""
raise NotImplementedError("This should be implemented.")
Expand All @@ -149,6 +155,7 @@ def ratings(self):
raise NotImplementedError("This should be implemented.")

def author(self):
# question: should we make this a required field (abstractmethod)?
raise NotImplementedError("This should be implemented.")

def cuisine(self):
Expand Down
3 changes: 3 additions & 0 deletions recipe_scrapers/coop.py
Expand Up @@ -16,6 +16,9 @@ def title(self):
def category(self):
return self.schema.category()

def total_time(self):
return self.schema.total_time()

def cook_time(self):
return self.schema.cook_time()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/davidlebovitz.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class DavidLebovitz(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/farmhousedelivery.py
Expand Up @@ -4,6 +4,7 @@
from bs4 import Tag

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string

"""
Expand All @@ -20,6 +21,11 @@ def host(self, domain="com"):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).get_text(strip=True)

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def ingredients(self):
# Style 1
ingredients_marker = self.soup.find("p", string=re.compile(r"Ingredients:"))
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/fredriksfikaallas.py
Expand Up @@ -2,6 +2,7 @@
import re

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class FredriksFikaAllas(AbstractScraper):
Expand All @@ -15,6 +16,11 @@ def title(self):
def category(self):
return self.soup.find("div", {"class": "post_category"}).get_text()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.soup.find("meta", {"property": "og:image", "content": True}).get(
"content"
Expand Down
7 changes: 7 additions & 0 deletions recipe_scrapers/kuchniadomowa.py
Expand Up @@ -10,10 +10,17 @@ def host(cls):
def title(self):
return self.soup.find("h2").get_text().strip()

def total_time(self):
return self.schema.total_time()

def image(self):
urls = self.soup.findAll("img", {"class": "article-img", "id": "article-img-1"})
return f"https:{urls[1]['src']}"

def ingredients(self):
# TODO: add implementation
raise NotImplementedError("This should be implemented.")

def instructions(self):
instructions = self.soup.find("div", {"id": "recipe-instructions"}).findAll(
"li"
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/kwestiasmaku.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import get_yields, normalize_string


Expand All @@ -14,6 +15,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return get_yields(
self.soup.find("div", {"class": "field-name-field-ilosc-porcji"})
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/maangchi.py
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string


Expand All @@ -18,6 +19,11 @@ def title(self):
def category(self):
return self.schema.category()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return self.schema.yields()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/mykitchen101.py
Expand Up @@ -4,6 +4,7 @@
from bs4 import BeautifulSoup

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import get_yields, normalize_string


Expand All @@ -18,6 +19,11 @@ def author(self):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).get_text()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return get_yields(self.soup.find("p", string=re.compile("分量:")).get_text())

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/mykitchen101en.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class MyKitchen101en(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return self.schema.yields()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/owenhan.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class OwenHan(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).text

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/rosannapansino.py
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string


Expand All @@ -12,6 +13,11 @@ def host(cls):
def title(self):
return self.soup.find("meta", {"property": "og:title"})["content"]

def total_time(self):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case where site does not have time information for their recipes:

raise RecipeScrapersExceptions(f"{self.host} does not provide time information.")

or sth along these lines to set an example.

raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
19 changes: 10 additions & 9 deletions templates/scraper.py
Expand Up @@ -8,21 +8,12 @@ class Template(AbstractScraper):
def host(cls):
return "example.com"

def author(self):
return self.schema.author()

def title(self):
return self.schema.title()

def category(self):
return self.schema.category()

def total_time(self):
return self.schema.total_time()

def yields(self):
return self.schema.yields()

def image(self):
return self.schema.image()

Expand All @@ -32,6 +23,16 @@ def ingredients(self):
def instructions(self):
return self.schema.instructions()

def author(self):
# question: should we make this a required field?
return self.schema.author()

def category(self):
return self.schema.category()

def yields(self):
return self.schema.yields()

def ratings(self):
return self.schema.ratings()

Expand Down
19 changes: 10 additions & 9 deletions templates/test_scraper.py
Expand Up @@ -11,21 +11,12 @@ class TestTemplateScraper(ScraperTest):
def test_host(self):
self.assertEqual("example.com", self.harvester_class.host())

def test_author(self):
self.assertEqual(None, self.harvester_class.author())

def test_title(self):
self.assertEqual(None, self.harvester_class.title())

def test_category(self):
self.assertEqual(None, self.harvester_class.category())

def test_total_time(self):
self.assertEqual(None, self.harvester_class.total_time())

def test_yields(self):
self.assertEqual(None, self.harvester_class.yields())

def test_image(self):
self.assertEqual(None, self.harvester_class.image())

Expand All @@ -35,6 +26,16 @@ def test_ingredients(self):
def test_instructions(self):
self.assertEqual(None, self.harvester_class.instructions())

def test_author(self):
# should we make this a required field?
self.assertEqual(None, self.harvester_class.author())

def test_category(self):
self.assertEqual(None, self.harvester_class.category())

def test_yields(self):
self.assertEqual(None, self.harvester_class.yields())

def test_ratings(self):
self.assertEqual(None, self.harvester_class.ratings())

Expand Down
4 changes: 4 additions & 0 deletions tests/test_davidlebovitz.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.davidlebovitz import DavidLebovitz
from tests import ScraperTest

Expand All @@ -15,6 +16,9 @@ def test_author(self):
def test_title(self):
self.assertEqual("Faux Gras", self.harvester_class.title())

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_image(self):
self.assertEqual(
"https://www.davidlebovitz.com/wp-content/uploads/2015/06/Faux-Gras-Lentil-Pate-8.jpg",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_farmhousedelivery_1.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.farmhousedelivery import FarmhouseDelivery
from tests import ScraperTest

Expand All @@ -19,6 +20,9 @@ def test_canonical_url(self):
def test_title(self):
self.assertEqual("Green Shakshuka", self.harvester_class.title())

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_ingredients(self):
self.assertEqual(
[
Expand Down
4 changes: 4 additions & 0 deletions tests/test_farmhousedelivery_2.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.farmhousedelivery import FarmhouseDelivery
from tests import ScraperTest

Expand All @@ -22,6 +23,9 @@ def test_title(self):
self.harvester_class.title(),
)

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_ingredients(self):
self.assertEqual(
[
Expand Down