Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for dates with dots and spaces #1028

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion dateparser/date.py
Expand Up @@ -35,7 +35,7 @@

RE_SANITIZE_SKIP = re.compile(r'\t|\n|\r|\u00bb|,\s\u0432\b|\u200e|\xb7|\u200f|\u064e|\u064f', flags=re.M)
RE_SANITIZE_RUSSIAN = re.compile(r'([\W\d])\u0433\.', flags=re.I | re.U)
RE_SANITIZE_PERIOD = re.compile(r'(?<=\D+)\.', flags=re.U)
RE_SANITIZE_PERIOD = re.compile(r'(?<=[^0-9\s])\.', flags=re.U)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💄

Suggested change
RE_SANITIZE_PERIOD = re.compile(r'(?<=[^0-9\s])\.', flags=re.U)
RE_SANITIZE_PERIOD = re.compile(r'(?<=[^\d\s])\.', flags=re.U)

RE_SANITIZE_ON = re.compile(r'^.*?on:\s+(.*)')
RE_SANITIZE_APOSTROPHE = re.compile('|'.join(APOSTROPHE_LOOK_ALIKE_CHARS))

Expand Down
2 changes: 1 addition & 1 deletion dateparser/parser.py
Expand Up @@ -223,7 +223,7 @@ class _parser:

def __init__(self, tokens, settings):
self.settings = settings
self.tokens = list(tokens)
self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can drop the list cast now:

Suggested change
self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)]
self.tokens = [(t[0].strip(), t[1]) for t in tokens]

self.filtered_tokens = [(t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1]

self.unset_tokens = []
Expand Down
9 changes: 8 additions & 1 deletion tests/test_date_parser.py
Expand Up @@ -46,6 +46,11 @@ def setUp(self):
param('21 January 2012 13:11:23.678', datetime(2012, 1, 21, 13, 11, 23, 678000)),
param('1/1/16 9:02:43.1', datetime(2016, 1, 1, 9, 2, 43, 100000)),
param('29.02.2020 13.12', datetime(2020, 2, 29, 13, 12)),
param('26. 10.21', datetime(2021, 10, 26, 0, 0)),
param('26. 10.21 14.12', datetime(2021, 10, 26, 14, 12)),
param('26 . 10.21', datetime(2021, 10, 26, 0, 0)),
param('30 . 09 . 22 12.12', datetime(2022, 9, 30, 12, 12)),
param('1 a.m 20.07.2021', datetime(2021, 7, 20, 1, 0)),
param('Wednesday, 22nd June, 2016, 12.16 pm.', datetime(2016, 6, 22, 12, 16)),
# French dates
param('11 Mai 2014', datetime(2014, 5, 11)),
Expand Down Expand Up @@ -181,7 +186,9 @@ def setUp(self):
param('2011 წლის 17 მარტი, ოთხშაბათი', datetime(2011, 3, 17, 0, 0)),
param('2015 წ. 12 ივნ, 15:34', datetime(2015, 6, 12, 15, 34)),
# Finnish dates
param('5.7.2018 5.45 ip.', datetime(2018, 7, 5, 17, 45))
param('5.7.2018 5.45 ip.', datetime(2018, 7, 5, 17, 45)),
param('5 .7 .2018 5.45 ip.', datetime(2018, 7, 5, 17, 45))

])
def test_dates_parsing(self, date_string, expected):
self.given_parser(settings={'NORMALIZE': False,
Expand Down