From 0b9d394872d15d5cc26c5b02f5692d664e719914 Mon Sep 17 00:00:00 2001 From: Daniel Roschka Date: Thu, 15 Feb 2024 21:32:04 +0100 Subject: [PATCH] Parse some abbreviated strings as relative dates dateparser so far did consider strings like "1h20m" as an absolute time. This commit changes that, so "1h20" remains an absolute time, while "1h20m" is now considered a relative date. This makes the output much more predictable and not dependent on the use of whitespaces anymore. Fixes #1012 Behavior before the changes: ```python >>> from datetime import datetime >>> import dateparser >>> ref_date = datetime(2023, 1, 2, 3, 4, 5) >>> dateparser.parse("1h20", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 20) >>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 20) >>> dateparser.parse("1h 20m", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 44, 5) >>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date, "PREFER_DATES_FROM": "future"}) datetime.datetime(2023, 1, 3, 1, 20) ``` Behavior after the changes: ```python >>> from datetime import datetime >>> import dateparser >>> ref_date = datetime(2023, 1, 2, 3, 4, 5) >>> dateparser.parse("1h20", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 20) >>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 44, 5) >>> dateparser.parse("1h 20m", settings={"RELATIVE_BASE": ref_date}) datetime.datetime(2023, 1, 2, 1, 44, 5) >>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date, "PREFER_DATES_FROM": "future"}) datetime.datetime(2023, 1, 2, 4, 24, 5) ``` --- dateparser/data/date_translation_data/en.py | 2 +- .../date_translation_data/en.yaml | 2 +- tests/test_languages.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index b70975e1b..1409dcadb 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -800,7 +800,7 @@ "(?:12\\s+)?midnight": "00:00" }, { - "(\\d+[.,]?\\d*)h(\\d+[.,]?\\d*)m?": "\\1:\\2" + "(\\d+[.,]?\\d*)h(\\d+[.,]?\\d*)": "\\1:\\2" }, { "(?<=from\\s+)now": "in" diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index d103bc577..38b31297d 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -67,7 +67,7 @@ simplifications: - a: '1' - (?:12\s+)?noon: '12:00' - (?:12\s+)?midnight: '00:00' - - (\d+[.,]?\d*)h(\d+[.,]?\d*)m?: \1:\2 + - (\d+[.,]?\d*)h(\d+[.,]?\d*): \1:\2 - (?<=from\s+)now: in - less than 1 minute ago: 45 second ago - (\d+[.,]?\d*) (decade|year|month|week|day|hour|minute|second)s? later: in \1 \2 diff --git a/tests/test_languages.py b/tests/test_languages.py index a4b28b982..f1525b446 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -1088,6 +1088,14 @@ def test_translation(self, shortname, datetime_string, expected_translation): param("en", "day before yesterday", "2 day ago"), param("en", "last month", "1 month ago"), param("en", "less than a minute ago", "45 second ago"), + param("en", "10h11", "10:11"), + param("en", "10h11m", "10 hour 11 minute"), + param("en", "3d8h2m", "3 day 8 hour 2 minute"), + param("en", "5d9h59m10s", "5 day 9 hour 59 minute 10 second"), + param("en", "3d1h", "3 day 1 hour"), + param("en", "3d29m", "3 day 29 minute"), + param("en", "1.5d10s", "1.5 day 10 second"), + param("en", "7m1s", "7 minute 1 second"), # German param("de", "vorgestern", "2 day ago"), param("de", "heute", "0 day ago"),