From 9321d9a08d6098b8678459ae9672a37a82793ed2 Mon Sep 17 00:00:00 2001 From: James Addison <55152140+jayaddison@users.noreply.github.com> Date: Tue, 18 Oct 2022 21:02:50 +0100 Subject: [PATCH] Cleanup: remove TEST_MODE setting (#645) --- recipe_scrapers/goustojson.py | 26 ++++++++++++------- recipe_scrapers/settings/default.py | 3 --- recipe_scrapers/woolworths.py | 23 +++++++++------- .../test_settings_module/test_settings.py | 1 - tests/test_goustojson.py | 8 ++++-- tests/test_woolworths.py | 6 +++++ 6 files changed, 41 insertions(+), 26 deletions(-) diff --git a/recipe_scrapers/goustojson.py b/recipe_scrapers/goustojson.py index 16dfe5db2..7c5ff2664 100644 --- a/recipe_scrapers/goustojson.py +++ b/recipe_scrapers/goustojson.py @@ -1,9 +1,7 @@ # mypy: disallow_untyped_defs=False -import json +import requests -from recipe_scrapers.settings import settings - -from ._abstract import AbstractScraper +from ._abstract import AbstractScraper, HEADERS from ._utils import get_minutes, get_yields, normalize_string, url_path_to_dict @@ -13,14 +11,22 @@ class GoustoJson(AbstractScraper): Let's see if it stands the test of time and reevaluate. """ - def __init__(self, url, *args, **kwargs): - if not settings.TEST_MODE: # pragma: no cover - recipe_slug = url_path_to_dict(url).get("path").split("/")[-1] - url = f"https://production-api.gousto.co.uk/cmsreadbroker/v1/recipe/{recipe_slug}" - + def __init__(self, url, proxies=None, timeout=None, *args, **kwargs): super().__init__(url=url, *args, **kwargs) - self.page_data = json.loads(self.page_data).get("data") + recipe_slug = url_path_to_dict(url).get("path").split("/")[-1] + data_url = ( + f"https://production-api.gousto.co.uk/cmsreadbroker/v1/recipe/{recipe_slug}" + ) + + recipe_json = requests.get( + data_url, + headers=HEADERS, + proxies=proxies, + timeout=timeout, + ).json() + + self.page_data = recipe_json.get("data") self.data = self.page_data.get("entry") @classmethod diff --git a/recipe_scrapers/settings/default.py b/recipe_scrapers/settings/default.py index b0e811a87..45dc2ca0e 100644 --- a/recipe_scrapers/settings/default.py +++ b/recipe_scrapers/settings/default.py @@ -37,9 +37,6 @@ } -TEST_MODE = False - - # logging.DEBUG # 10 # logging.INFO # 20 # logging.WARNING # 30 diff --git a/recipe_scrapers/woolworths.py b/recipe_scrapers/woolworths.py index c69f10f64..b3d01fd28 100644 --- a/recipe_scrapers/woolworths.py +++ b/recipe_scrapers/woolworths.py @@ -1,23 +1,26 @@ # mypy: disallow_untyped_defs=False -import json +import requests -from recipe_scrapers.settings import settings - -from ._abstract import AbstractScraper +from ._abstract import AbstractScraper, HEADERS from ._schemaorg import SchemaOrg from ._utils import url_path_to_dict class Woolworths(AbstractScraper): - def __init__(self, url, *args, **kwargs): - if not settings.TEST_MODE: # pragma: no cover - target = url_path_to_dict(url)["path"].split("/")[-1] - url = f"https://foodhub.woolworths.com.au/content/woolworths-foodhub/en/{target}.model.json" - + def __init__(self, url, proxies=None, timeout=None, *args, **kwargs): super().__init__(url=url, *args, **kwargs) + target = url_path_to_dict(url)["path"].split("/")[-1] + data_url = f"https://foodhub.woolworths.com.au/content/woolworths-foodhub/en/{target}.model.json" + self.page_data = ( - json.loads(self.page_data) + requests.get( + data_url, + headers=HEADERS, + proxies=proxies, + timeout=timeout, + ) + .json() .get(":items") .get("root") .get(":items") diff --git a/tests/test_data/test_settings_module/test_settings.py b/tests/test_data/test_settings_module/test_settings.py index f456a6938..429a4286f 100644 --- a/tests/test_data/test_settings_module/test_settings.py +++ b/tests/test_data/test_settings_module/test_settings.py @@ -1,3 +1,2 @@ SUPPRESS_EXCEPTIONS = True -TEST_MODE = True # LOG_LEVEL = 20 diff --git a/tests/test_goustojson.py b/tests/test_goustojson.py index 483a4acb6..2d0ce8fb0 100644 --- a/tests/test_goustojson.py +++ b/tests/test_goustojson.py @@ -1,3 +1,4 @@ +from responses import GET from recipe_scrapers.goustojson import GoustoJson from tests import ScraperTest @@ -5,8 +6,11 @@ class TestGoustoScraper(ScraperTest): scraper_class = GoustoJson - test_file_name = "gousto" - test_file_extension = "testjson" + + @property + def expected_requests(self): + yield GET, "https://www.gousto.co.uk/cookbook/recipes/malaysian-style-coconut-meat-free-chicken-pickled-cucumber", "tests/test_data/gousto.testjson" + yield GET, "https://production-api.gousto.co.uk/cmsreadbroker/v1/recipe/malaysian-style-coconut-meat-free-chicken-pickled-cucumber", "tests/test_data/gousto.testjson" def test_host(self): self.assertEqual("gousto.co.uk", self.harvester_class.host()) diff --git a/tests/test_woolworths.py b/tests/test_woolworths.py index 7ba02ad8a..c9f4a9643 100644 --- a/tests/test_woolworths.py +++ b/tests/test_woolworths.py @@ -1,3 +1,4 @@ +from responses import GET from recipe_scrapers.woolworths import Woolworths from tests import ScraperTest @@ -6,6 +7,11 @@ class TestWoolworthsScraper(ScraperTest): scraper_class = Woolworths + @property + def expected_requests(self): + yield GET, "https://www.woolworths.com.au/shop/recipes/asparagus-salad-with-lemon-vinaigrette", "tests/test_data/woolworths.testhtml" + yield GET, "https://foodhub.woolworths.com.au/content/woolworths-foodhub/en/asparagus-salad-with-lemon-vinaigrette.model.json", "tests/test_data/woolworths.testhtml" + def test_host(self): self.assertEqual("woolworths.com.au", self.harvester_class.host())