From 8af4f296a41e5ca9b2e3cde7deccb26a0652b70a Mon Sep 17 00:00:00 2001 From: Athar Mohammad <56029409+atharmohammad@users.noreply.github.com> Date: Mon, 18 Oct 2021 17:09:37 +0530 Subject: [PATCH] Added support for russian language when starting with a small letter (#999) * added support for russian language when starting with a small letter * added tests for russian dates starting with smaller letters --- dateparser/date.py | 2 +- tests/test_date_parser.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dateparser/date.py b/dateparser/date.py index b49096e18..bc50cd8f4 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -33,7 +33,7 @@ RE_TRIM_SPACES = re.compile(r'^\s+(\S.*?)\s+$') RE_TRIM_COLONS = re.compile(r'(\S.*?):*$') -RE_SANITIZE_SKIP = re.compile(r'\t|\n|\r|\u00bb|,\s\u0432|\u200e|\xb7|\u200f|\u064e|\u064f', flags=re.M) +RE_SANITIZE_SKIP = re.compile(r'\t|\n|\r|\u00bb|,\s\u0432\b|\u200e|\xb7|\u200f|\u064e|\u064f', flags=re.M) RE_SANITIZE_RUSSIAN = re.compile(r'([\W\d])\u0433\.', flags=re.I | re.U) RE_SANITIZE_PERIOD = re.compile(r'(?<=\D+)\.', flags=re.U) RE_SANITIZE_ON = re.compile(r'^.*?on:\s+(.*)') diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index cb480c7c5..ead18292b 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -91,6 +91,7 @@ def setUp(self): param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), + param('21 сентября 2021г., вторник', datetime(2021, 9, 21, 0, 0)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net