From 1bc9e14ddcfb7d33fea785c89bfdd25ec4e5c96a Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 30 Sep 2022 16:56:29 +0100 Subject: [PATCH 1/5] Add test coverage for get_minutes parsing of ISO8601 time durations --- tests/library/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/library/test_utils.py b/tests/library/test_utils.py index dfdbd5d8e..3e82b5748 100644 --- a/tests/library/test_utils.py +++ b/tests/library/test_utils.py @@ -71,3 +71,15 @@ def test_get_minutes_handles_dashes(self): def test_get_minutes_handles_to(self): text = "15 to 20 minutes" self.assertEqual(20, get_minutes(text)) + + iso8601_fixtures = { + "PT1H": 60, + "PT20M": 20, + "PT2H10M": 130, + "PT0H9M30S": 10, + } + + def test_get_minutes_handles_iso8601(self): + for text, expected_minutes in self.iso8601_fixtures.items(): + with self.subTest(text=text, expected_minutes=expected_minutes): + self.assertEqual(expected_minutes, get_minutes(text)) From c8ce9f2e902a098eac949ac66b6f198438a9b1c6 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 30 Sep 2022 16:57:29 +0100 Subject: [PATCH 2/5] Add explicit isodate library dependency Note: we already currently depend on this library implicitly; it is required by rdflib which is required by extruct --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 768ea9707..bbfa60728 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ install_requires=[ "beautifulsoup4>=4.10.0", "extruct>=0.8.0", + "isodate>=0.6.1", "requests>=2.19.1", "types-beautifulsoup4>=4.11.6", "types-requests>=2.28.10", From 5becb52c6e9b8935ae1a65b8fe61a7ed6b6f5336 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 30 Sep 2022 16:59:02 +0100 Subject: [PATCH 3/5] Use isodate library to attempt to parse time durations from strings that look like potential ISO8601 date strings in get_minutes utility method --- recipe_scrapers/_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index 56c86f867..d1297b317 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -1,5 +1,8 @@ # mypy: disallow_untyped_defs=False + import html +import isodate +import math import re from ._exceptions import ElementNotFoundInHtml @@ -47,6 +50,15 @@ def get_minutes(element, return_zero_on_not_found=False): time_text = element else: time_text = element.get_text() + + # attempt iso8601 duration parsing + if time_text.startswith("PT"): + try: + duration = isodate.parse_duration(time_text) + return math.ceil(duration.total_seconds() / 60) + except Exception: + pass + if time_text.startswith("P") and "T" in time_text: time_text = time_text.split("T", 2)[1] if "-" in time_text: From 95d9cdf53186d1568619ac47dd8e736e183bb691 Mon Sep 17 00:00:00 2001 From: James Addison Date: Fri, 30 Sep 2022 17:13:28 +0100 Subject: [PATCH 4/5] Ignore flake8 complexity check failure for get_minutes method Refactoring and simplification would be useful here --- recipe_scrapers/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipe_scrapers/_utils.py b/recipe_scrapers/_utils.py index d1297b317..e35511923 100644 --- a/recipe_scrapers/_utils.py +++ b/recipe_scrapers/_utils.py @@ -33,7 +33,7 @@ SERVE_REGEX_TO = re.compile(r"\d+(\s+to\s+|-)\d+", flags=re.I | re.X) -def get_minutes(element, return_zero_on_not_found=False): +def get_minutes(element, return_zero_on_not_found=False): # noqa: C901: TODO if element is None: # to be removed if return_zero_on_not_found: From 7c2adf9f56166f528301a0acbd2fc4c1de83ac8b Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 1 Oct 2022 13:43:16 +0100 Subject: [PATCH 5/5] Nitpick: remove expected value from subtest descriptive arguments --- tests/library/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/library/test_utils.py b/tests/library/test_utils.py index 3e82b5748..b0f97d3c9 100644 --- a/tests/library/test_utils.py +++ b/tests/library/test_utils.py @@ -81,5 +81,5 @@ def test_get_minutes_handles_to(self): def test_get_minutes_handles_iso8601(self): for text, expected_minutes in self.iso8601_fixtures.items(): - with self.subTest(text=text, expected_minutes=expected_minutes): + with self.subTest(text=text): self.assertEqual(expected_minutes, get_minutes(text))