Skip to content

Commit

Permalink
Parse some abbreviated strings as relative dates
Browse files Browse the repository at this point in the history
dateparser so far did consider strings like "1h20m" as an absolute time.
This commit changes that, so "1h20" remains an absolute time, while
"1h20m" is now considered a relative date. This makes the output much
more predictable and not dependent on the use of whitespaces anymore.

Fixes #1012

Behavior before the changes:

```python
>>> from datetime import datetime
>>> import dateparser
>>> ref_date = datetime(2023, 1, 2, 3, 4, 5)
>>> dateparser.parse("1h20", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 20)
>>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 20)
>>> dateparser.parse("1h 20m", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 44, 5)
>>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date,
                                        "PREFER_DATES_FROM": "future"})
datetime.datetime(2023, 1, 3, 1, 20)
```

Behavior after the changes:

```python
>>> from datetime import datetime
>>> import dateparser
>>> ref_date = datetime(2023, 1, 2, 3, 4, 5)
>>> dateparser.parse("1h20", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 20)
>>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 44, 5)
>>> dateparser.parse("1h 20m", settings={"RELATIVE_BASE": ref_date})
datetime.datetime(2023, 1, 2, 1, 44, 5)
>>> dateparser.parse("1h20m", settings={"RELATIVE_BASE": ref_date,
                                        "PREFER_DATES_FROM": "future"})
datetime.datetime(2023, 1, 2, 4, 24, 5)
```
  • Loading branch information
Dunedan committed Feb 15, 2024
1 parent 1d4b058 commit 0b9d394
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dateparser/data/date_translation_data/en.py
Expand Up @@ -800,7 +800,7 @@
"(?:12\\s+)?midnight": "00:00"
},
{
"(\\d+[.,]?\\d*)h(\\d+[.,]?\\d*)m?": "\\1:\\2"
"(\\d+[.,]?\\d*)h(\\d+[.,]?\\d*)": "\\1:\\2"
},
{
"(?<=from\\s+)now": "in"
Expand Down
Expand Up @@ -67,7 +67,7 @@ simplifications:
- a: '1'
- (?:12\s+)?noon: '12:00'
- (?:12\s+)?midnight: '00:00'
- (\d+[.,]?\d*)h(\d+[.,]?\d*)m?: \1:\2
- (\d+[.,]?\d*)h(\d+[.,]?\d*): \1:\2
- (?<=from\s+)now: in
- less than 1 minute ago: 45 second ago
- (\d+[.,]?\d*) (decade|year|month|week|day|hour|minute|second)s? later: in \1 \2
Expand Down
8 changes: 8 additions & 0 deletions tests/test_languages.py
Expand Up @@ -1088,6 +1088,14 @@ def test_translation(self, shortname, datetime_string, expected_translation):
param("en", "day before yesterday", "2 day ago"),
param("en", "last month", "1 month ago"),
param("en", "less than a minute ago", "45 second ago"),
param("en", "10h11", "10:11"),
param("en", "10h11m", "10 hour 11 minute"),
param("en", "3d8h2m", "3 day 8 hour 2 minute"),
param("en", "5d9h59m10s", "5 day 9 hour 59 minute 10 second"),
param("en", "3d1h", "3 day 1 hour"),
param("en", "3d29m", "3 day 29 minute"),
param("en", "1.5d10s", "1.5 day 10 second"),
param("en", "7m1s", "7 minute 1 second"),
# German
param("de", "vorgestern", "2 day ago"),
param("de", "heute", "0 day ago"),
Expand Down

0 comments on commit 0b9d394

Please sign in to comment.