Skip to content

Commit

Permalink
Update the scraper-generator template. Inherit from abc.ABC in Abstra…
Browse files Browse the repository at this point in the history
…ctScraper (#664)
  • Loading branch information
jayaddison committed Dec 16, 2022
1 parent 0b0a360 commit cb64e51
Show file tree
Hide file tree
Showing 22 changed files with 124 additions and 19 deletions.
9 changes: 8 additions & 1 deletion recipe_scrapers/_abstract.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
import inspect
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, Union
from urllib.parse import urljoin
Expand All @@ -17,7 +18,7 @@
}


class AbstractScraper:
class AbstractScraper(ABC):
page_data: Union[str, bytes]

def __init__(
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(
setattr(self.__class__, "plugins_initialized", True)

@classmethod
@abstractmethod
def host(cls) -> str:
"""get the host of the url, so we can use the correct scraper"""
raise NotImplementedError("This should be implemented.")
Expand All @@ -71,12 +73,14 @@ def canonical_url(self):
return urljoin(self.url, canonical_link["href"])
return self.url

@abstractmethod
def title(self):
raise NotImplementedError("This should be implemented.")

def category(self):
raise NotImplementedError("This should be implemented.")

@abstractmethod
def total_time(self):
"""total time it takes to preparate and cook the recipe in minutes"""
raise NotImplementedError("This should be implemented.")
Expand Down Expand Up @@ -130,9 +134,11 @@ def language(self):
# Return the first candidate language
return candidate_languages.popitem(last=False)[0]

@abstractmethod
def ingredients(self):
raise NotImplementedError("This should be implemented.")

@abstractmethod
def instructions(self) -> str:
"""instructions to prepare the recipe"""
raise NotImplementedError("This should be implemented.")
Expand All @@ -149,6 +155,7 @@ def ratings(self):
raise NotImplementedError("This should be implemented.")

def author(self):
# question: should we make this a required field (abstractmethod)?
raise NotImplementedError("This should be implemented.")

def cuisine(self):
Expand Down
3 changes: 3 additions & 0 deletions recipe_scrapers/coop.py
Expand Up @@ -16,6 +16,9 @@ def title(self):
def category(self):
return self.schema.category()

def total_time(self):
return self.schema.total_time()

def cook_time(self):
return self.schema.cook_time()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/davidlebovitz.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class DavidLebovitz(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/farmhousedelivery.py
Expand Up @@ -4,6 +4,7 @@
from bs4 import Tag

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string

"""
Expand All @@ -20,6 +21,11 @@ def host(self, domain="com"):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).get_text(strip=True)

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def ingredients(self):
# Style 1
ingredients_marker = self.soup.find("p", string=re.compile(r"Ingredients:"))
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/fredriksfikaallas.py
Expand Up @@ -2,6 +2,7 @@
import re

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class FredriksFikaAllas(AbstractScraper):
Expand All @@ -15,6 +16,11 @@ def title(self):
def category(self):
return self.soup.find("div", {"class": "post_category"}).get_text()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.soup.find("meta", {"property": "og:image", "content": True}).get(
"content"
Expand Down
7 changes: 7 additions & 0 deletions recipe_scrapers/kuchniadomowa.py
Expand Up @@ -10,10 +10,17 @@ def host(cls):
def title(self):
return self.soup.find("h2").get_text().strip()

def total_time(self):
return self.schema.total_time()

def image(self):
urls = self.soup.findAll("img", {"class": "article-img", "id": "article-img-1"})
return f"https:{urls[1]['src']}"

def ingredients(self):
# TODO: add implementation
raise NotImplementedError("This should be implemented.")

def instructions(self):
instructions = self.soup.find("div", {"id": "recipe-instructions"}).findAll(
"li"
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/kwestiasmaku.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import get_yields, normalize_string


Expand All @@ -14,6 +15,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return get_yields(
self.soup.find("div", {"class": "field-name-field-ilosc-porcji"})
Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/maangchi.py
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string


Expand All @@ -18,6 +19,11 @@ def title(self):
def category(self):
return self.schema.category()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return self.schema.yields()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/mykitchen101.py
Expand Up @@ -4,6 +4,7 @@
from bs4 import BeautifulSoup

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import get_yields, normalize_string


Expand All @@ -18,6 +19,11 @@ def author(self):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).get_text()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return get_yields(self.soup.find("p", string=re.compile("分量:")).get_text())

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/mykitchen101en.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class MyKitchen101en(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.schema.title()

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def yields(self):
return self.schema.yields()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/owenhan.py
@@ -1,5 +1,6 @@
# mypy: disallow_untyped_defs=False
from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions


class OwenHan(AbstractScraper):
Expand All @@ -13,6 +14,11 @@ def author(self):
def title(self):
return self.soup.find("h1", {"class": "entry-title"}).text

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
6 changes: 6 additions & 0 deletions recipe_scrapers/rosannapansino.py
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs

from ._abstract import AbstractScraper
from ._exceptions import RecipeScrapersExceptions
from ._utils import normalize_string


Expand All @@ -12,6 +13,11 @@ def host(cls):
def title(self):
return self.soup.find("meta", {"property": "og:title"})["content"]

def total_time(self):
raise RecipeScrapersExceptions(
f"{self.host()} does not provide time information."
)

def image(self):
return self.schema.image()

Expand Down
19 changes: 10 additions & 9 deletions templates/scraper.py
Expand Up @@ -8,21 +8,12 @@ class Template(AbstractScraper):
def host(cls):
return "example.com"

def author(self):
return self.schema.author()

def title(self):
return self.schema.title()

def category(self):
return self.schema.category()

def total_time(self):
return self.schema.total_time()

def yields(self):
return self.schema.yields()

def image(self):
return self.schema.image()

Expand All @@ -32,6 +23,16 @@ def ingredients(self):
def instructions(self):
return self.schema.instructions()

def author(self):
# question: should we make this a required field?
return self.schema.author()

def category(self):
return self.schema.category()

def yields(self):
return self.schema.yields()

def ratings(self):
return self.schema.ratings()

Expand Down
19 changes: 10 additions & 9 deletions templates/test_scraper.py
Expand Up @@ -11,21 +11,12 @@ class TestTemplateScraper(ScraperTest):
def test_host(self):
self.assertEqual("example.com", self.harvester_class.host())

def test_author(self):
self.assertEqual(None, self.harvester_class.author())

def test_title(self):
self.assertEqual(None, self.harvester_class.title())

def test_category(self):
self.assertEqual(None, self.harvester_class.category())

def test_total_time(self):
self.assertEqual(None, self.harvester_class.total_time())

def test_yields(self):
self.assertEqual(None, self.harvester_class.yields())

def test_image(self):
self.assertEqual(None, self.harvester_class.image())

Expand All @@ -35,6 +26,16 @@ def test_ingredients(self):
def test_instructions(self):
self.assertEqual(None, self.harvester_class.instructions())

def test_author(self):
# should we make this a required field?
self.assertEqual(None, self.harvester_class.author())

def test_category(self):
self.assertEqual(None, self.harvester_class.category())

def test_yields(self):
self.assertEqual(None, self.harvester_class.yields())

def test_ratings(self):
self.assertEqual(None, self.harvester_class.ratings())

Expand Down
4 changes: 4 additions & 0 deletions tests/test_davidlebovitz.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.davidlebovitz import DavidLebovitz
from tests import ScraperTest

Expand All @@ -15,6 +16,9 @@ def test_author(self):
def test_title(self):
self.assertEqual("Faux Gras", self.harvester_class.title())

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_image(self):
self.assertEqual(
"https://www.davidlebovitz.com/wp-content/uploads/2015/06/Faux-Gras-Lentil-Pate-8.jpg",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_farmhousedelivery_1.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.farmhousedelivery import FarmhouseDelivery
from tests import ScraperTest

Expand All @@ -19,6 +20,9 @@ def test_canonical_url(self):
def test_title(self):
self.assertEqual("Green Shakshuka", self.harvester_class.title())

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_ingredients(self):
self.assertEqual(
[
Expand Down
4 changes: 4 additions & 0 deletions tests/test_farmhousedelivery_2.py
@@ -1,3 +1,4 @@
from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.farmhousedelivery import FarmhouseDelivery
from tests import ScraperTest

Expand All @@ -22,6 +23,9 @@ def test_title(self):
self.harvester_class.title(),
)

def test_total_time(self):
self.assertRaises(RecipeScrapersExceptions, self.harvester_class.total_time)

def test_ingredients(self):
self.assertEqual(
[
Expand Down

0 comments on commit cb64e51

Please sign in to comment.