From 3eea80ee91e4d4b9b450ed9afbf9836d3e93ea7f Mon Sep 17 00:00:00 2001 From: atharmohammad Date: Sat, 18 Dec 2021 21:53:44 +0530 Subject: [PATCH 1/2] support for dates with dots and spaces --- dateparser/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dateparser/parser.py b/dateparser/parser.py index e2df66b03..30536cab7 100644 --- a/dateparser/parser.py +++ b/dateparser/parser.py @@ -223,7 +223,7 @@ class _parser: def __init__(self, tokens, settings): self.settings = settings - self.tokens = list(tokens) + self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)] self.filtered_tokens = [(t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1] self.unset_tokens = [] From cabeaf4142047706ffe782b7f2fbd9ac789f4d5a Mon Sep 17 00:00:00 2001 From: atharmohammad Date: Sun, 19 Dec 2021 00:51:29 +0530 Subject: [PATCH 2/2] support for period between spaces and added unit tests for the same --- dateparser/date.py | 2 +- tests/test_date_parser.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dateparser/date.py b/dateparser/date.py index ac8283ba2..1a91764e4 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -35,7 +35,7 @@ RE_SANITIZE_SKIP = re.compile(r'\t|\n|\r|\u00bb|,\s\u0432\b|\u200e|\xb7|\u200f|\u064e|\u064f', flags=re.M) RE_SANITIZE_RUSSIAN = re.compile(r'([\W\d])\u0433\.', flags=re.I | re.U) -RE_SANITIZE_PERIOD = re.compile(r'(?<=\D+)\.', flags=re.U) +RE_SANITIZE_PERIOD = re.compile(r'(?<=[^0-9\s])\.', flags=re.U) RE_SANITIZE_ON = re.compile(r'^.*?on:\s+(.*)') RE_SANITIZE_APOSTROPHE = re.compile('|'.join(APOSTROPHE_LOOK_ALIKE_CHARS)) diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index ead18292b..537e38ef4 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -46,6 +46,11 @@ def setUp(self): param('21 January 2012 13:11:23.678', datetime(2012, 1, 21, 13, 11, 23, 678000)), param('1/1/16 9:02:43.1', datetime(2016, 1, 1, 9, 2, 43, 100000)), param('29.02.2020 13.12', datetime(2020, 2, 29, 13, 12)), + param('26. 10.21', datetime(2021, 10, 26, 0, 0)), + param('26. 10.21 14.12', datetime(2021, 10, 26, 14, 12)), + param('26 . 10.21', datetime(2021, 10, 26, 0, 0)), + param('30 . 09 . 22 12.12', datetime(2022, 9, 30, 12, 12)), + param('1 a.m 20.07.2021', datetime(2021, 7, 20, 1, 0)), param('Wednesday, 22nd June, 2016, 12.16 pm.', datetime(2016, 6, 22, 12, 16)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), @@ -181,7 +186,9 @@ def setUp(self): param('2011 წლის 17 მარტი, ოთხშაბათი', datetime(2011, 3, 17, 0, 0)), param('2015 წ. 12 ივნ, 15:34', datetime(2015, 6, 12, 15, 34)), # Finnish dates - param('5.7.2018 5.45 ip.', datetime(2018, 7, 5, 17, 45)) + param('5.7.2018 5.45 ip.', datetime(2018, 7, 5, 17, 45)), + param('5 .7 .2018 5.45 ip.', datetime(2018, 7, 5, 17, 45)) + ]) def test_dates_parsing(self, date_string, expected): self.given_parser(settings={'NORMALIZE': False,