Skip to content

Commit

Permalink
Fixed issue arrow-py#701
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewchouman committed Nov 25, 2019
1 parent e67c810 commit f434f7a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
17 changes: 11 additions & 6 deletions arrow/parser.py
Expand Up @@ -218,6 +218,7 @@ def parse(self, datetime_string, fmt):
fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)

match = fmt_pattern_re.search(datetime_string)

if match is None:
raise ParserMatchError(
"Failed to match '{}' when parsing '{}'".format(fmt, datetime_string)
Expand Down Expand Up @@ -292,12 +293,16 @@ def _generate_pattern_re(self, fmt):
# and time string in a natural language sentence. Therefore, searching
# for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
# work properly.
# Reference: https://stackoverflow.com/q/14232931/3820660
starting_word_boundary = r"(?<![\S])"
ending_word_boundary = r"(?![\S])"
bounded_fmt_pattern = r"{}{}{}".format(
starting_word_boundary, final_fmt_pattern, ending_word_boundary
)
# Certain punctuation before or after the target pattern such as
# "1998-09-12," is permitted. For the full list of valid punctuation,
# see the documentation.

starting_punctuation_bound = r"(?<!\S\S)(?<!\s[^,.;:?!\"'`\[\]{}(" \
r")<>\s])(\b|^)"
ending_punctuation_bound = r"(?=[,.;:?!\"'`\[\]{}()<>]?(?!\S))"
bounded_fmt_pattern = r"{}{}{}".format(starting_punctuation_bound,
final_fmt_pattern,
ending_punctuation_bound)

return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)

Expand Down
12 changes: 12 additions & 0 deletions docs/index.rst
Expand Up @@ -212,6 +212,7 @@ Support for a growing number of locales (see ``locales.py`` for supported langua

.. code-block:: python
>>> future = arrow.utcnow().shift(hours=1)
>>> future.humanize(a, locale='ru')
'через 2 час(а,ов)'
Expand Down Expand Up @@ -427,6 +428,17 @@ You can also escape regular expressions by enclosing them within square brackets
>>> arrow.get("Mon Sep 08 16:41:45 2014", fmt)
<Arrow [2014-09-08T16:41:45+00:00]>
Punctuation
~~~~~~~~~~~

You're date formats can be precede and/or be preceded by one character of
punctuation from the list: :code:`, . ; : ? ! " ' ` [ ] { } ( ) < >`

.. code-block:: python
>>> arrow.get("Tomorrow (2019-10-31) is Halloween!", "YYYY-MM-DD")
<Arrow [2019-10-31T00:00:00+00:00]>
API Guide
---------

Expand Down
29 changes: 28 additions & 1 deletion tests/parser_tests.py
Expand Up @@ -472,7 +472,10 @@ def test_parse_with_extra_words_at_start_and_end_invalid(self):

def test_parse_with_extra_words_at_start_and_end_valid(self):
# Spaces surrounding the parsable date are ok because we
# allow the parsing of natural language input
# allow the parsing of natural language input. Additionally, a single
# character of specific punctuation before or after the date is okay.
# See docs for full list of valid punctuation.

self.assertEqual(
self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1)
)
Expand Down Expand Up @@ -527,6 +530,30 @@ def test_parse_with_extra_words_at_start_and_end_valid(self):
datetime(2016, 5, 16, 4, 5, 6, 789120),
)

self.assertEqual(
self.parser.parse(
"Meet me at my house on the my birthday (2019-24-11)",
"YYYY-DD-MM"
),
datetime(2019, 11, 24)
)

self.assertEqual(
self.parser.parse(
"Monday, 9. September 2019, 16:15-20:00",
"dddd, D. MMMM YYYY"
),
datetime(2019, 9, 9)
)

self.assertEqual(
self.parser.parse(
"A date is 11.11.2011.",
"DD.MM.YYYY"
),
datetime(2011, 11, 11)
)

def test_parse_with_leading_and_trailing_whitespace(self):
self.assertEqual(self.parser.parse(" 2016", "YYYY"), datetime(2016, 1, 1))

Expand Down

0 comments on commit f434f7a

Please sign in to comment.