From 47445f99a5c9935c899bc803947f9628b62c2aed Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 22 Jun 2019 21:16:31 -0400 Subject: [PATCH 01/50] started some fixes for get method --- arrow/parser.py | 51 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 7266ff957..e2e73ac11 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -91,39 +91,60 @@ def parse_iso(self, string): has_time = "T" in string or " " in string.strip() space_divider = " " in string.strip() + has_tz = False + if has_time: if space_divider: date_string, time_string = string.split(" ", 1) else: date_string, time_string = string.split("T", 1) + time_parts = re.split("[+-]", time_string, 1) + colon_count = time_parts[0].count(":") + has_tz = len(time_parts) > 1 - has_seconds = time_parts[0].count(":") > 1 + has_hours = colon_count == 0 + has_minutes = colon_count == 1 + has_seconds = colon_count == 2 has_subseconds = re.search("[.,]", time_parts[0]) if has_subseconds: - formats = ["YYYY-MM-DDTHH:mm:ss%sS" % has_subseconds.group()] + time_string = "HH:mm:ss{}S".format(has_subseconds.group()) elif has_seconds: - formats = ["YYYY-MM-DDTHH:mm:ss"] + time_string = "HH:mm:ss" + elif has_minutes: + time_string = "HH:mm" + elif has_hours: + time_string = "HH" else: - formats = ["YYYY-MM-DDTHH:mm"] - else: - has_tz = False - # generate required formats: YYYY-MM-DD, YYYY-MM-DD, YYYY - # using various separators: -, /, . - len_markers = len(self.MARKERS) - formats = [ - separator.join(self.MARKERS[: len_markers - i]) - for i in range(len_markers) - for separator in self.SEPARATORS - ] + # TODO: improve error message + # ! TODO: add tests for new conditional cases + raise ValueError("ISO 8601 time string expected.") + + # required ISO 8601 formats + formats = [ + "YYYY-MM-DD", + "YYYY/MM/DD", + "YYYY.MM.DD", + "YYYY-MM", + "YYYY/MM", + "YYYY.MM", + "YYYY", + ] + + # !? NOTE: ASK CHRIS ABOUT . SEPARATOR => I am not sure if it is part of ISO 8601? + + if has_time: + formats = ["{}T{}".format(f, time_string) for f in formats] if has_time and has_tz: - formats = [f + "Z" for f in formats] + formats = ["{}Z".format(f) for f in formats] if space_divider: formats = [item.replace("T", " ", 1) for item in formats] + # ! IDEA: pass in a flag to denote that we are coming from a get() + # request with no formatting string was passed in return self._parse_multiformat(string, formats) def _generate_pattern_re(self, fmt): From df49462a04706d9cbce0b974fb6d8a5a5ff45783 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 23 Jun 2019 12:20:27 -0400 Subject: [PATCH 02/50] Current progress of fixing get --- .gitignore | 1 + arrow/factory.py | 2 +- arrow/parser.py | 32 ++++++++++++++------ arrow/util.py | 69 +++--------------------------------------- tests/arrow_tests.py | 64 --------------------------------------- tests/factory_tests.py | 6 ++-- tests/parser_tests.py | 14 ++++++--- 7 files changed, 41 insertions(+), 147 deletions(-) diff --git a/.gitignore b/.gitignore index ae9b3e31a..9eded2397 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ docs/_build/ # VS Code .vscode/ +.idea/ diff --git a/arrow/factory.py b/arrow/factory.py index 8591443a4..d6bf4dffa 100644 --- a/arrow/factory.py +++ b/arrow/factory.py @@ -150,7 +150,7 @@ def get(self, *args, **kwargs): if arg is None: return self.type.utcnow() - # try (int, float, str(int), str(float)) -> utc, from timestamp. + # try (int, float) -> utc, from timestamp. if is_timestamp(arg): return self.type.utcfromtimestamp(arg) diff --git a/arrow/parser.py b/arrow/parser.py index e2e73ac11..d22a23515 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -99,15 +99,19 @@ def parse_iso(self, string): else: date_string, time_string = string.split("T", 1) + # ! TODO: look for Z in time string? time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") + # is_basic_format = colon_count > 0 + has_tz = len(time_parts) > 1 has_hours = colon_count == 0 has_minutes = colon_count == 1 has_seconds = colon_count == 2 has_subseconds = re.search("[.,]", time_parts[0]) + # TODO: Add support for basic timestamps if has_subseconds: time_string = "HH:mm:ss{}S".format(has_subseconds.group()) elif has_seconds: @@ -117,26 +121,25 @@ def parse_iso(self, string): elif has_hours: time_string = "HH" else: - # TODO: improve error message # ! TODO: add tests for new conditional cases - raise ValueError("ISO 8601 time string expected.") + raise ValueError("No valid time component provided.") - # required ISO 8601 formats + # required date formats to test against formats = [ "YYYY-MM-DD", "YYYY/MM/DD", "YYYY.MM.DD", + "YYYYMMDD", "YYYY-MM", "YYYY/MM", "YYYY.MM", "YYYY", ] - # !? NOTE: ASK CHRIS ABOUT . SEPARATOR => I am not sure if it is part of ISO 8601? - if has_time: formats = ["{}T{}".format(f, time_string) for f in formats] + # TODO: what if someone adds a Z already? if has_time and has_tz: formats = ["{}Z".format(f) for f in formats] @@ -145,7 +148,7 @@ def parse_iso(self, string): # ! IDEA: pass in a flag to denote that we are coming from a get() # request with no formatting string was passed in - return self._parse_multiformat(string, formats) + return self._parse_multiformat(string, formats, True) def _generate_pattern_re(self, fmt): @@ -198,7 +201,7 @@ def _generate_pattern_re(self, fmt): return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE) - def parse(self, string, fmt): + def parse(self, string, fmt, from_parse_iso=False): if isinstance(fmt, list): return self._parse_multiformat(string, fmt) @@ -212,6 +215,17 @@ def parse(self, string, fmt): fmt_pattern_re.pattern, string ) ) + + if from_parse_iso: + if match.start() != 0: + raise ParserError + + if string[-1] == "Z" and match.end() != len(string): + raise ParserError + + if string[-1] != "Z" and match.end() != len(string): + raise ParserError + parts = {} for token in fmt_tokens: if token == "Do": @@ -307,13 +321,13 @@ def _build_datetime(parts): tzinfo=parts.get("tzinfo"), ) - def _parse_multiformat(self, string, formats): + def _parse_multiformat(self, string, formats, from_parse_iso=False): _datetime = None for fmt in formats: try: - _datetime = self.parse(string, fmt) + _datetime = self.parse(string, fmt, from_parse_iso) break except ParserError: pass diff --git a/arrow/util.py b/arrow/util.py index 03132f7ed..8a379a4ef 100644 --- a/arrow/util.py +++ b/arrow/util.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import sys -import warnings +from datetime import datetime def total_seconds(td): # pragma: no cover @@ -10,12 +9,12 @@ def total_seconds(td): # pragma: no cover def is_timestamp(value): - if type(value) == bool: + if isinstance(value, bool): return False try: - float(value) + datetime.fromtimestamp(value) return True - except Exception: + except TypeError: return False @@ -34,62 +33,4 @@ def isstr(s): return isinstance(s, str) -class list_to_iter_shim(list): - """ A temporary shim for functions that currently return a list but that will, after a - deprecation period, return an iteratator. - """ - - def __init__(self, iterable=(), **kwargs): - """ Equivalent to list(iterable). warn_text will be emitted on all non-iterator operations. - """ - self._warn_text = ( - kwargs.pop("warn_text", None) - or "this object will be converted to an iterator in a future release" - ) - self._iter_count = 0 - list.__init__(self, iterable, **kwargs) - - def _warn(self): - warnings.warn(self._warn_text, DeprecationWarning) - - def __iter__(self): - self._iter_count += 1 - if self._iter_count > 1: - self._warn() - return list.__iter__(self) - - def _wrap_method(name): - list_func = getattr(list, name) - - def wrapper(self, *args, **kwargs): - self._warn() - return list_func(self, *args, **kwargs) - - return wrapper - - __contains__ = _wrap_method("__contains__") - __add__ = _wrap_method("__add__") - __mul__ = _wrap_method("__mul__") - __getitem__ = _wrap_method("__getitem__") - # Ideally, we would throw warnings from __len__, but list(x) calls len(x) - index = _wrap_method("index") - count = _wrap_method("count") - __setitem__ = _wrap_method("__setitem__") - __delitem__ = _wrap_method("__delitem__") - append = _wrap_method("append") - if sys.version_info.major >= 3: # pragma: no cover - clear = _wrap_method("clear") - copy = _wrap_method("copy") - extend = _wrap_method("extend") - __iadd__ = _wrap_method("__iadd__") - __imul__ = _wrap_method("__imul__") - insert = _wrap_method("insert") - pop = _wrap_method("pop") - remove = _wrap_method("remove") - reverse = _wrap_method("reverse") - sort = _wrap_method("sort") - - del _wrap_method - - -__all__ = ["total_seconds", "is_timestamp", "isstr", "list_to_iter_shim"] +__all__ = ["total_seconds", "is_timestamp", "isstr"] diff --git a/tests/arrow_tests.py b/tests/arrow_tests.py index 825db674c..c88959b0d 100644 --- a/tests/arrow_tests.py +++ b/tests/arrow_tests.py @@ -1814,67 +1814,3 @@ def test_get_iteration_params(self): with self.assertRaises(Exception): arrow.Arrow._get_iteration_params(None, None) - - def test_list_to_iter_shim(self): - def newshim(): - return util.list_to_iter_shim(range(5), warn_text="testing") - - # Iterating over a shim once should not throw a warning - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - - iter(newshim()) - list(newshim()) - for _ in newshim(): - pass - len(newshim()) # ...because it's called by `list(x)` - - self.assertEqual([], w) - - # Iterating over a shim twice (or more) should throw a warning - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - shim = newshim() - - for _ in shim: - pass - for _ in shim: - pass - - self.assertEqual(1, len(w)) - self.assertEqual(w[0].category, DeprecationWarning) - self.assertEqual("testing", w[0].message.args[0]) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - shim = newshim() - - 0 in shim - shim + [] - shim * 1 - shim[0] - shim.index(0) - shim.count(0) - - shim[0:0] = [] # doesn't warn on py2 - del shim[0:0] # doesn't warn on py2 - newshim().append(6) - if sys.version_info.major >= 3: # pragma: no cover - newshim().clear() - shim.copy() - shim.extend([]) - shim += [] - shim *= 1 - newshim().insert(0, 6) - shim.pop(-1) - newshim().remove(0) - newshim().reverse() - newshim().sort() - - if sys.version_info.major >= 3: # pragma: no cover - self.assertEqual(19, len(w)) - else: # pragma: no cover - self.assertEqual(15, len(w)) - for warn in w: - self.assertEqual(warn.category, DeprecationWarning) - self.assertEqual("testing", warn.message.args[0]) diff --git a/tests/factory_tests.py b/tests/factory_tests.py index 21e5ae4f6..ef0b60d01 100644 --- a/tests/factory_tests.py +++ b/tests/factory_tests.py @@ -25,7 +25,7 @@ def test_no_args(self): def test_timestamp_one_arg_no_arg(self): - no_arg = self.factory.get("1406430900").timestamp + no_arg = self.factory.get(1406430900).timestamp one_arg = self.factory.get("1406430900", "X").timestamp self.assertEqual(no_arg, one_arg) @@ -49,16 +49,14 @@ def test_one_arg_timestamp(self): timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) self.assertEqual(self.factory.get(timestamp), timestamp_dt) - self.assertEqual(self.factory.get(str(timestamp)), timestamp_dt) timestamp = 123.45 timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) self.assertEqual(self.factory.get(timestamp), timestamp_dt) - self.assertEqual(self.factory.get(str(timestamp)), timestamp_dt) # Issue 216 - timestamp = "99999999999999999999999999" + timestamp = 99999999999999999999999999 # Python 3 raises `OverflowError`, Python 2 raises `ValueError` with self.assertRaises((OverflowError, ValueError)): self.factory.get(timestamp) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index a212f0426..f9d0e3a42 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -22,8 +22,10 @@ def test_parse_multiformat(self): mock_datetime = self.mock() - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b").returns(mock_datetime) + self.expect(self.parser.parse).args("str", "fmt_a", False).raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_b", False).returns( + mock_datetime + ) result = self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -31,8 +33,8 @@ def test_parse_multiformat(self): def test_parse_multiformat_all_fail(self): - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a", False).raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_b", False).raises(ParserError) with self.assertRaises(ParserError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -41,7 +43,9 @@ def test_parse_multiformat_unself_expected_fail(self): class UnselfExpectedError(Exception): pass - self.expect(self.parser.parse).args("str", "fmt_a").raises(UnselfExpectedError) + self.expect(self.parser.parse).args("str", "fmt_a", False).raises( + UnselfExpectedError + ) with self.assertRaises(UnselfExpectedError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) From d69d16c89f49adb54302b16fabea4539445f040a Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Mon, 24 Jun 2019 15:50:43 +0100 Subject: [PATCH 03/50] Create custom warning class for get changes --- arrow/parser.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arrow/parser.py b/arrow/parser.py index d22a23515..3883cee55 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, unicode_literals import re +import warnings from datetime import datetime from dateutil import tz @@ -18,6 +19,18 @@ class ParserError(RuntimeError): pass +class GetParseWarning(DeprecationWarning): + """Raised when .get() is passed a string with no formats and matches incorrectly + on one of the default formats. + + e.g. + arrow.get('blabla2016') -> + arrow.get('13/4/2045') -> + + In version 0.15.0 this will become a ParserError. + """ + + class DateTimeParser(object): _FORMAT_RE = re.compile( @@ -218,12 +231,34 @@ def parse(self, string, fmt, from_parse_iso=False): if from_parse_iso: if match.start() != 0: + warnings.warn( + "Parser loosely matched {fmt} on '{string}', in the " + "future this will raise a ParserError.".format( + fmt=fmt, string=string + ), + category=GetParseWarning, + ) raise ParserError if string[-1] == "Z" and match.end() != len(string): + # TODO what about 2019-06-24T10:45:31Z + warnings.warn( + "Parser loosely matched {fmt} on '{string}', in the " + "future this will raise a ParserError.".format( + fmt=fmt, string=string + ), + category=GetParseWarning, + ) raise ParserError if string[-1] != "Z" and match.end() != len(string): + warnings.warn( + "Parser loosely matched {fmt} on '{string}', in the " + "future this will raise a ParserError.".format( + fmt=fmt, string=string + ), + category=GetParseWarning, + ) raise ParserError parts = {} From a50137f1ac37d486ef90ac188827965dc2ace04c Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 29 Jun 2019 11:15:47 -0400 Subject: [PATCH 04/50] More progress toward GET fixes --- arrow/parser.py | 8 ++++++-- tests/parser_tests.py | 8 ++++---- tests/util_tests.py | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 tests/util_tests.py diff --git a/arrow/parser.py b/arrow/parser.py index d22a23515..aec8661b0 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -105,7 +105,7 @@ def parse_iso(self, string): # is_basic_format = colon_count > 0 - has_tz = len(time_parts) > 1 + has_tz = len(time_parts) > 1 or string[-1] == "Z" has_hours = colon_count == 0 has_minutes = colon_count == 1 has_seconds = colon_count == 2 @@ -217,12 +217,15 @@ def parse(self, string, fmt, from_parse_iso=False): ) if from_parse_iso: + # Accounts for cases such as "blahblah2016" if match.start() != 0: raise ParserError - if string[-1] == "Z" and match.end() != len(string): + # Accounts for cases such as "2016-05T04:05:06.78912blahZ" + if string[-1] == "Z" and match.end() != len(string) - 1: raise ParserError + # Accounts for cases such as "2016-05T04:05:06.78912Zblah" if string[-1] != "Z" and match.end() != len(string): raise ParserError @@ -239,6 +242,7 @@ def _parse_token(self, token, value, parts): if token == "YYYY": parts["year"] = int(value) + elif token == "YY": value = int(value) parts["year"] = 1900 + value if value > 68 else 2000 + value diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f9d0e3a42..88fb8f484 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -562,11 +562,11 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 3600)), ) + parsed = self.parser.parse_iso("2013-02-03 04:05:06.78912Z") + + expected = datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 0)) # Properly parse string with Z timezone - self.assertEqual( - self.parser.parse_iso("2013-02-03T04:05:06.78912Z"), - datetime(2013, 2, 3, 4, 5, 6, 789120), - ) + self.assertEqual(parsed, expected) def test_gnu_date(self): """ diff --git a/tests/util_tests.py b/tests/util_tests.py new file mode 100644 index 000000000..693ba9762 --- /dev/null +++ b/tests/util_tests.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +from chai import Chai + +from arrow import util + + +class UtilTests(Chai): + def test_is_timestamp(self): + timestamp_float = datetime.now().timestamp() + timestamp_int = int(timestamp_float) + + self.assertTrue(util.is_timestamp(timestamp_int)) + self.assertTrue(util.is_timestamp(timestamp_float)) + + self.assertFalse(util.is_timestamp(str(timestamp_int))) + self.assertFalse(util.is_timestamp(str(timestamp_float))) + self.assertFalse(util.is_timestamp(True)) + self.assertFalse(util.is_timestamp(False)) + + full_datetime = "2019-06-23T13:12:42" + self.assertFalse(util.is_timestamp(full_datetime)) From 12c1b4c67b2a5027ec005ddd5b56963cce24c480 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 29 Jun 2019 12:04:43 -0400 Subject: [PATCH 05/50] Reverted change to handling of Z string --- arrow/parser.py | 9 +++------ tests/parser_tests.py | 11 ++++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 8c5947f83..8947ea9d7 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -112,13 +112,13 @@ def parse_iso(self, string): else: date_string, time_string = string.split("T", 1) - # ! TODO: look for Z in time string? + # TODO: understand why we are not accounting for Z directly time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") # is_basic_format = colon_count > 0 - has_tz = len(time_parts) > 1 or string[-1] == "Z" + has_tz = len(time_parts) > 1 has_hours = colon_count == 0 has_minutes = colon_count == 1 has_seconds = colon_count == 2 @@ -134,7 +134,7 @@ def parse_iso(self, string): elif has_hours: time_string = "HH" else: - # ! TODO: add tests for new conditional cases + # TODO: add tests for new conditional cases raise ValueError("No valid time component provided.") # required date formats to test against @@ -152,15 +152,12 @@ def parse_iso(self, string): if has_time: formats = ["{}T{}".format(f, time_string) for f in formats] - # TODO: what if someone adds a Z already? if has_time and has_tz: formats = ["{}Z".format(f) for f in formats] if space_divider: formats = [item.replace("T", " ", 1) for item in formats] - # ! IDEA: pass in a flag to denote that we are coming from a get() - # request with no formatting string was passed in return self._parse_multiformat(string, formats, True) def _generate_pattern_re(self, fmt): diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 88fb8f484..d4c3872fb 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -562,11 +562,12 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 3600)), ) - parsed = self.parser.parse_iso("2013-02-03 04:05:06.78912Z") - - expected = datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 0)) - # Properly parse string with Z timezone - self.assertEqual(parsed, expected) + # TODO: Shouldn't the datetime object being compared to have a tz offset of 0? + # Test fails if this offset is added. + self.assertEqual( + self.parser.parse_iso("2013-02-03 04:05:06.78912Z"), + datetime(2013, 2, 3, 4, 5, 6, 789120), + ) def test_gnu_date(self): """ From b67c9e2e83a386acf57f2c5a9347cef0c6baf8af Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 29 Jun 2019 13:15:38 -0400 Subject: [PATCH 06/50] Added a number of new formatting changes --- arrow/parser.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 8947ea9d7..53a220e43 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -116,17 +116,16 @@ def parse_iso(self, string): time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") - # is_basic_format = colon_count > 0 + is_basic_time_format = colon_count == 0 has_tz = len(time_parts) > 1 - has_hours = colon_count == 0 - has_minutes = colon_count == 1 - has_seconds = colon_count == 2 - has_subseconds = re.search("[.,]", time_parts[0]) - - # TODO: Add support for basic timestamps - if has_subseconds: - time_string = "HH:mm:ss{}S".format(has_subseconds.group()) + has_hours = colon_count == 0 or len(time_string) == 2 + has_minutes = colon_count == 1 or len(time_string) == 4 + has_seconds = colon_count == 2 or len(time_string) == 6 + has_sub_seconds = re.search("[.,]", time_parts[0]) + + if has_sub_seconds: + time_string = "HH:mm:ss{}S".format(has_sub_seconds.group()) elif has_seconds: time_string = "HH:mm:ss" elif has_minutes: @@ -137,16 +136,26 @@ def parse_iso(self, string): # TODO: add tests for new conditional cases raise ValueError("No valid time component provided.") + if is_basic_time_format: + time_string = time_string.replace(":", "") + # required date formats to test against formats = [ "YYYY-MM-DD", + "YYYY-M-DD", + "YYYY-M-D", "YYYY/MM/DD", + "YYYY/M/DD", + "YYYY/M/D", "YYYY.MM.DD", + "YYYY.M.DD", + "YYYY.M.D", "YYYYMMDD", "YYYY-MM", "YYYY/MM", "YYYY.MM", "YYYY", + "YY", ] if has_time: From 824b7445c8f615700b2f14b86274faef791f7d22 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 30 Jun 2019 14:51:20 -0400 Subject: [PATCH 07/50] Removed unnecessary helper functions in parser class and cleaned up some variable names --- .pre-commit-config.yaml | 4 ++-- arrow/parser.py | 33 ++++++++++----------------------- tests/parser_tests.py | 11 ----------- 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e4e41268..9f950d481 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,11 +15,11 @@ repos: hooks: - id: seed-isort-config - repo: https://github.com/pre-commit/mirrors-isort - rev: v4.3.20 + rev: v4.3.21 hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v1.18.0 + rev: v1.19.0 hooks: - id: pyupgrade - repo: https://github.com/pre-commit/pygrep-hooks diff --git a/arrow/parser.py b/arrow/parser.py index 53a220e43..ea677d8f2 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -139,6 +139,7 @@ def parse_iso(self, string): if is_basic_time_format: time_string = time_string.replace(":", "") + # TODO: add tests for all the new formats # required date formats to test against formats = [ "YYYY-MM-DD", @@ -209,14 +210,13 @@ def _generate_pattern_re(self, fmt): offset += len(input_pattern) - (m.end() - m.start()) final_fmt_pattern = "" - a = fmt_pattern.split(r"\#") - b = escaped_data + split_fmt = fmt_pattern.split(r"\#") # Due to the way Python splits, 'a' will always be longer - for i in range(len(a)): - final_fmt_pattern += a[i] - if i < len(b): - final_fmt_pattern += b[i][1:-1] + for i in range(len(split_fmt)): + final_fmt_pattern += split_fmt[i] + if i < len(escaped_data): + final_fmt_pattern += escaped_data[i][1:-1] return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE) @@ -277,6 +277,7 @@ def parse(self, string, fmt, from_parse_iso=False): else: value = match.group(token) self._parse_token(token, value, parts) + return self._build_datetime(parts) def _parse_token(self, token, value, parts): @@ -379,27 +380,13 @@ def _parse_multiformat(self, string, formats, from_parse_iso=False): if _datetime is None: raise ParserError( - "Could not match input to any of {} on '{}'".format(formats, string) + "Could not match input '{}' to any of the supported formats: {}".format( + string, ", ".join(formats) + ) ) return _datetime - @staticmethod - def _map_lookup(input_map, key): - - try: - return input_map[key] - except KeyError: - raise ParserError('Could not match "{}" to {}'.format(key, input_map)) - - @staticmethod - def _try_timestamp(string): - - try: - return float(string) - except Exception: - return None - @staticmethod def _choice_re(choices, flags=0): return re.compile(r"({})".format("|".join(choices)), flags=flags) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index d4c3872fb..32034f263 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -319,17 +319,6 @@ def test_parse_subsecond_rounding(self): self.assertEqual(self.parser.parse(string, format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) - def test_map_lookup_keyerror(self): - - with self.assertRaises(parser.ParserError): - parser.DateTimeParser._map_lookup({"a": "1"}, "b") - - def test_try_timestamp(self): - - self.assertEqual(parser.DateTimeParser._try_timestamp("1.1"), 1.1) - self.assertEqual(parser.DateTimeParser._try_timestamp("1"), 1) - self.assertEqual(parser.DateTimeParser._try_timestamp("abc"), None) - class DateTimeParserRegexTests(Chai): def setUp(self): From dc2e6d251f33838afa5e5fb826ab0b9cefe22cb8 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 30 Jun 2019 17:41:55 -0400 Subject: [PATCH 08/50] Fixed bug with YY mapping to 20 --- arrow/parser.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index ea677d8f2..ff8b6a209 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -42,7 +42,7 @@ class DateTimeParser(object): _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") _TWO_DIGIT_RE = re.compile(r"\d{2}") - _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?") + _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _BASE_INPUT_RE_MAP = { @@ -163,6 +163,8 @@ def parse_iso(self, string): formats = ["{}T{}".format(f, time_string) for f in formats] if has_time and has_tz: + # Add "Z" to format strings to indicate to _parse_tokens + # that a timezone needs to be parsed formats = ["{}Z".format(f) for f in formats] if space_divider: @@ -212,7 +214,7 @@ def _generate_pattern_re(self, fmt): final_fmt_pattern = "" split_fmt = fmt_pattern.split(r"\#") - # Due to the way Python splits, 'a' will always be longer + # Due to the way Python splits, 'split_fmt' will always be longer for i in range(len(split_fmt)): final_fmt_pattern += split_fmt[i] if i < len(escaped_data): @@ -270,6 +272,9 @@ def parse(self, string, fmt, from_parse_iso=False): ) raise ParserError + if "YY" in fmt_tokens and match.end() != len(string): + raise ParserError + parts = {} for token in fmt_tokens: if token == "Do": @@ -404,7 +409,7 @@ def parse(cls, string): if string == "local": tzinfo = tz.tzlocal() - elif string in ["utc", "UTC"]: + elif string in ["utc", "UTC", "Z"]: tzinfo = tz.tzutc() else: From 955850c634ab5c2d470cdab150c27643c2872329 Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Sat, 6 Jul 2019 15:52:35 +0100 Subject: [PATCH 09/50] Add tests for .get() warnings and comment on current problems --- arrow/parser.py | 62 ++++++++++++++++++++++++++++++++----------- tests/parser_tests.py | 33 ++++++++++++++++++++++- 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index ff8b6a209..c16be582a 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -20,17 +20,20 @@ class ParserError(RuntimeError): class GetParseWarning(DeprecationWarning): - """Raised when .get() is passed a string with no formats and matches incorrectly + """Raised when arrow.get() is passed a string with no formats and matches incorrectly on one of the default formats. e.g. arrow.get('blabla2016') -> arrow.get('13/4/2045') -> - In version 0.15.0 this will become a ParserError. + In version 0.15.0 this warning will become a ParserError. """ +warnings.simplefilter("default", GetParseWarning) + + class DateTimeParser(object): _FORMAT_RE = re.compile( @@ -113,9 +116,12 @@ def parse_iso(self, string): date_string, time_string = string.split("T", 1) # TODO: understand why we are not accounting for Z directly + # currently Z is ignored entirely but fromdatetime defaults to UTC, see arrow.py L196 + # '2013-02-03T04:05:06.78912Z' time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") + # TODO "20160504T010203Z" parses incorectly, time part is HH only, due to Z changing len is_basic_time_format = colon_count == 0 has_tz = len(time_parts) > 1 @@ -139,6 +145,8 @@ def parse_iso(self, string): if is_basic_time_format: time_string = time_string.replace(":", "") + # IDEA reduced set of date formats for basic + # TODO: add tests for all the new formats # required date formats to test against formats = [ @@ -156,7 +164,7 @@ def parse_iso(self, string): "YYYY/MM", "YYYY.MM", "YYYY", - "YY", + # "YY", this is not a good format to try by default? ] if has_time: @@ -241,39 +249,63 @@ def parse(self, string, fmt, from_parse_iso=False): # Accounts for cases such as "blahblah2016" if match.start() != 0: warnings.warn( - "Parser loosely matched {fmt} on '{string}', in the " - "future this will raise a ParserError.".format( + "Parser loosely matched {fmt} on '{string}', in version " + "0.15.0 this will raise a ParserError.".format( fmt=fmt, string=string ), category=GetParseWarning, ) - raise ParserError + # raise ParserError + # TODO arrow.get('2013-02-03 04:05:06.78912Z') is warning incorrectly due to this # Accounts for cases such as "2016-05T04:05:06.78912blahZ" if string[-1] == "Z" and match.end() != len(string) - 1: - # TODO what about 2019-06-24T10:45:31Z warnings.warn( - "Parser loosely matched {fmt} on '{string}', in the " - "future this will raise a ParserError.".format( + "Parser loosely matched {fmt} on '{string}', in version " + "0.15.0 this will raise a ParserError.".format( fmt=fmt, string=string ), category=GetParseWarning, ) - raise ParserError + # raise ParserError # Accounts for cases such as "2016-05T04:05:06.78912Zblah" if string[-1] != "Z" and match.end() != len(string): warnings.warn( - "Parser loosely matched {fmt} on '{string}', in the " - "future this will raise a ParserError.".format( + "Parser loosely matched {fmt} on '{string}', in version " + "0.15.0 this will raise a ParserError.".format( fmt=fmt, string=string ), category=GetParseWarning, ) - raise ParserError + # raise ParserError - if "YY" in fmt_tokens and match.end() != len(string): - raise ParserError + else: + # fixes arrow.get("15/01/2019", ["D/M/YY","D/M/YYYY"]) => + # FIXME arrow.get("Call 01-02-03 on 79-01-01 12:05:10", "YY-MM-DD HH:mm:ss") warns incorrectly + # FIXME arrow.get("79-01-01 12:05:10", "YY-MM-DD HH:mm:ss") warns incorrectly + # IDEA test for whitespace on either side of match? + if "YY" in fmt_tokens and match.start != 0 or match.end() != len(string): + warnings.warn( + "Parser loosely matched {fmt} on '{string}', in version " + "0.15.0 this will raise a ParserError.".format( + fmt=fmt, string=string + ), + category=GetParseWarning, + ) + # #raise ParserError + + if fmt == "YYYY": + # accounts for arrow.get('05/02/2017', ['YYYY', 'MM/DD/YYYY']) + if match.start() != 0 or match.end() != len(string): + warnings.warn( + "Parser loosely matched {fmt} on '{string}', in version " + "0.15.0 this will raise a ParserError.".format( + fmt=fmt, string=string + ), + category=GetParseWarning, + ) + # #raise ParserError parts = {} for token in fmt_tokens: diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 32034f263..c2d33b8b5 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -9,7 +9,7 @@ from dateutil import tz from arrow import parser -from arrow.parser import DateTimeParser, ParserError +from arrow.parser import DateTimeParser, GetParseWarning, ParserError class DateTimeParserTests(Chai): @@ -553,11 +553,42 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): # TODO: Shouldn't the datetime object being compared to have a tz offset of 0? # Test fails if this offset is added. + # fromdatetime adds UTC timezone on afterwards! parse_iso returns naive datetime in this case self.assertEqual( self.parser.parse_iso("2013-02-03 04:05:06.78912Z"), datetime(2013, 2, 3, 4, 5, 6, 789120), ) + def test_bad_get_parsing(self): + # fixes for loose get parsing + + with self.assertWarns(GetParseWarning): + self.parser.parse_iso("blabla2016") + + with self.assertWarns(GetParseWarning): + self.parser.parse_iso("2016blabla") + + with self.assertWarns(GetParseWarning): + self.parser.parse_iso("10/4/2045") + + with self.assertWarns(GetParseWarning): + self.parser.parse_iso("2016-05T04:05:06.78912blahZ") + + with self.assertWarns(GetParseWarning): + self.parser.parse_iso("2016-05T04:05:06.78912Zblah") + + with self.assertWarns(GetParseWarning): + self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]) + + with self.assertWarns(GetParseWarning): + self.parser.parse("05/02/2017", ["YYYY", "MM/DD/YYYY"]) + + with self.assertWarns(GetParseWarning): + self.parser.parse("1919/05/23", ["YY/M/D", "YYYY/M/D"]) + + with self.assertWarns(GetParseWarning): + self.parser.parse("2017/05/22", ["YYYY", "YYYY/MM/DD"]) + def test_gnu_date(self): """ regression tests for parsing output from GNU date(1) From 91d99d765d008001da113ab59280b5be8f36ab61 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 6 Jul 2019 18:23:13 -0500 Subject: [PATCH 10/50] Added comments and tests --- arrow/factory.py | 13 +++++---- arrow/parser.py | 26 +++++++++++++++--- tests/parser_tests.py | 63 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 11 deletions(-) diff --git a/arrow/factory.py b/arrow/factory.py index d6bf4dffa..0192412bc 100644 --- a/arrow/factory.py +++ b/arrow/factory.py @@ -59,7 +59,7 @@ def get(self, *args, **kwargs): >>> arrow.get(arw) - **One** ``str``, ``float``, or ``int``, convertible to a floating-point timestamp, to get + **One** ``float`` or ``int``, convertible to a floating-point timestamp, to get that timestamp in UTC:: >>> arrow.get(1367992474.293378) @@ -68,17 +68,16 @@ def get(self, *args, **kwargs): >>> arrow.get(1367992474) - >>> arrow.get('1367992474.293378') - - - >>> arrow.get('1367992474') - - **One** ISO-8601-formatted ``str``, to parse it:: >>> arrow.get('2013-09-29T01:26:43.830580') + **One** ISO-8601-formatted ``str``, in basic format, to parse it:: + + >>> arrow.get('20160413T133656.456289') + + **One** ``tzinfo``, to get the current time **converted** to that timezone:: >>> arrow.get(tz.tzlocal()) diff --git a/arrow/parser.py b/arrow/parser.py index ff8b6a209..bc5bc36d8 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -67,7 +67,6 @@ class DateTimeParser(object): "S": _ONE_OR_MORE_DIGIT_RE, } - MARKERS = ["YYYY", "MM", "DD"] SEPARATORS = ["-", "/", "."] def __init__(self, locale="en_us", cache_size=0): @@ -122,10 +121,10 @@ def parse_iso(self, string): has_hours = colon_count == 0 or len(time_string) == 2 has_minutes = colon_count == 1 or len(time_string) == 4 has_seconds = colon_count == 2 or len(time_string) == 6 - has_sub_seconds = re.search("[.,]", time_parts[0]) + has_subseconds = re.search("[.,]", time_parts[0]) - if has_sub_seconds: - time_string = "HH:mm:ss{}S".format(has_sub_seconds.group()) + if has_subseconds: + time_string = "HH:mm:ss{}S".format(has_subseconds.group()) elif has_seconds: time_string = "HH:mm:ss" elif has_minutes: @@ -272,9 +271,28 @@ def parse(self, string, fmt, from_parse_iso=False): ) raise ParserError + # Fixes bug where "15/01/2019" matches to "D/M/YY" + # arrow.get("15/01/2019", ["D/M/YY", "D/M/YYYY"]) if "YY" in fmt_tokens and match.end() != len(string): raise ParserError + # TODO: talk to Chris about these conditionals + # if string[-1] == "Z" and match.end() != len(string) - 1: + # # TODO: add an exception message + # raise ParserError + # + # if string[-1] != "Z" and match.end() != len(string): + # # TODO: add an exception message + # raise ParserError + # + # if match.start() != 0: + # # TODO: add an exception message + # raise ParserError + + # if ("YY" in fmt_tokens or "YYYY" in fmt_tokens) and (match.end() != len(string) or match.start() != 0): + # # TODO: add an exception message + # raise ParserError + parts = {} for token in fmt_tokens: if token == "Do": diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 32034f263..29bc86f42 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -115,6 +115,43 @@ def test_parser_multiple_line_caching(self): for _ in range(100): self.parser._generate_pattern_re("fmt_a") + def test_YY_and_YYYY_format_list(self): + + self.assertEqual( + self.parser.parse("15/01/19", ["D/M/YY", "D/M/YYYY"]), datetime(2019, 1, 15) + ) + + # Regression test for issue #580 + self.assertEqual( + self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]), + datetime(2019, 1, 15), + ) + + self.assertEqual( + self.parser.parse( + "15/01/2019T04:05:06.789120Z", + ["D/M/YYThh:mm:ss.SZ", "D/M/YYYYThh:mm:ss.SZ"], + ), + datetime(2019, 1, 15, 4, 5, 6, 789120, tzinfo=tz.tzutc()), + ) + + def test_long_year_input(self): + + # TODO: ask Chris if this should throw a ParserError + # Pendulum does not throw an error + self.assertEqual( + self.parser.parse("09 January 123456789101112", "DD MMMM YYYY"), + datetime(1234, 1, 9), + ) + + # Pendulum throws an error + with self.assertRaises(ParserError): + self.parser.parse("123456789101112 09 January", "YYYY DD MMMM") + + # Pendulum throws an error + with self.assertRaises(ParserError): + self.parser.parse("68096653015/01/19", "YY/M/DD") + class DateTimeParserParseTests(Chai): def setUp(self): @@ -584,6 +621,32 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) + def test_iso8601_string_with_extra_words_at_start_and_end(self): + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016-05blah") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016-05-16blah") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016-05T04:05:06.78912ZblahZ") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016-05T04:05:06.78912Zblah") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016-05T04:05:06.78912blahZ") + + with self.assertRaises(ParserError): + self.parser.parse_iso("blah2016") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2016blah") + + with self.assertRaises(ParserError): + self.parser.parse_iso("blah2016blah") + class TzinfoParserTests(Chai): def setUp(self): From 6ab1c0ba646af93091232e67cc3fa1d3a6a93107 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 7 Jul 2019 14:57:41 -0400 Subject: [PATCH 11/50] Fixed merging issues --- Makefile | 2 +- arrow/parser.py | 20 ++++++------ tests/parser_tests.py | 71 ++++++++++++++++++++++--------------------- 3 files changed, 47 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index ec29f2aea..4852d5520 100644 --- a/Makefile +++ b/Makefile @@ -31,4 +31,4 @@ docs: clean: rm -rf venv .tox ./**/__pycache__ rm -rf dist build .egg arrow.egg-info - rm -f ./**/*.pyc .coverage + rm -f ./**/*.pyc ./**/.coverage diff --git a/arrow/parser.py b/arrow/parser.py index 489cd49c8..cec107171 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -254,7 +254,7 @@ def parse(self, string, fmt, from_parse_iso=False): ), category=GetParseWarning, ) - # raise ParserError + raise ParserError # TODO arrow.get('2013-02-03 04:05:06.78912Z') is warning incorrectly due to this # Accounts for cases such as "2016-05T04:05:06.78912blahZ" @@ -266,7 +266,7 @@ def parse(self, string, fmt, from_parse_iso=False): ), category=GetParseWarning, ) - # raise ParserError + raise ParserError # Accounts for cases such as "2016-05T04:05:06.78912Zblah" if string[-1] != "Z" and match.end() != len(string): @@ -277,12 +277,7 @@ def parse(self, string, fmt, from_parse_iso=False): ), category=GetParseWarning, ) - # raise ParserError - - # Fixes bug where "15/01/2019" matches to "D/M/YY" - # arrow.get("15/01/2019", ["D/M/YY", "D/M/YYYY"]) - if "YY" in fmt_tokens and match.end() != len(string): - raise ParserError + raise ParserError else: # fixes arrow.get("15/01/2019", ["D/M/YY","D/M/YYYY"]) => # FIXME arrow.get("Call 01-02-03 on 79-01-01 12:05:10", "YY-MM-DD HH:mm:ss") warns incorrectly @@ -296,7 +291,7 @@ def parse(self, string, fmt, from_parse_iso=False): ), category=GetParseWarning, ) - # #raise ParserError + raise ParserError if fmt == "YYYY": # accounts for arrow.get('05/02/2017', ['YYYY', 'MM/DD/YYYY']) @@ -308,7 +303,12 @@ def parse(self, string, fmt, from_parse_iso=False): ), category=GetParseWarning, ) - # #raise ParserError + raise ParserError + + # Fixes bug where "15/01/2019" matches to "D/M/YY" + # arrow.get("15/01/2019", ["D/M/YY", "D/M/YYYY"]) + if "YY" in fmt_tokens and match.end() != len(string): + raise ParserError # TODO: talk to Chris about these conditionals # if string[-1] == "Z" and match.end() != len(string) - 1: diff --git a/tests/parser_tests.py b/tests/parser_tests.py index c505645eb..deec9a09d 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -9,7 +9,7 @@ from dateutil import tz from arrow import parser -from arrow.parser import DateTimeParser, GetParseWarning, ParserError +from arrow.parser import DateTimeParser, ParserError class DateTimeParserTests(Chai): @@ -118,12 +118,13 @@ def test_parser_multiple_line_caching(self): def test_YY_and_YYYY_format_list(self): self.assertEqual( - self.parser.parse("15/01/19", ["D/M/YY", "D/M/YYYY"]), datetime(2019, 1, 15) + self.parser.parse("15/01/19", ["DD/MM/YY", "DD/MM/YYYY"]), + datetime(2019, 1, 15), ) # Regression test for issue #580 self.assertEqual( - self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]), + self.parser.parse("15/01/2019", ["DD/MM/YY", "DD/MM/YYYY"]), datetime(2019, 1, 15), ) @@ -588,43 +589,43 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 3600)), ) - # TODO: Shouldn't the datetime object being compared to have a tz offset of 0? - # Test fails if this offset is added. - # fromdatetime adds UTC timezone on afterwards! parse_iso returns naive datetime in this case + # parse_iso sets tzinfo to None if Z is passed, so a default datetime + # object is sufficient to compare against. + # Arrow adds +00:00 when get() is called directly and tzinfo is None self.assertEqual( self.parser.parse_iso("2013-02-03 04:05:06.78912Z"), datetime(2013, 2, 3, 4, 5, 6, 789120), ) - def test_bad_get_parsing(self): - # fixes for loose get parsing - - with self.assertWarns(GetParseWarning): - self.parser.parse_iso("blabla2016") - - with self.assertWarns(GetParseWarning): - self.parser.parse_iso("2016blabla") - - with self.assertWarns(GetParseWarning): - self.parser.parse_iso("10/4/2045") - - with self.assertWarns(GetParseWarning): - self.parser.parse_iso("2016-05T04:05:06.78912blahZ") - - with self.assertWarns(GetParseWarning): - self.parser.parse_iso("2016-05T04:05:06.78912Zblah") - - with self.assertWarns(GetParseWarning): - self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]) - - with self.assertWarns(GetParseWarning): - self.parser.parse("05/02/2017", ["YYYY", "MM/DD/YYYY"]) - - with self.assertWarns(GetParseWarning): - self.parser.parse("1919/05/23", ["YY/M/D", "YYYY/M/D"]) - - with self.assertWarns(GetParseWarning): - self.parser.parse("2017/05/22", ["YYYY", "YYYY/MM/DD"]) + # def test_bad_get_parsing(self): + # # fixes for loose get parsing + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse_iso("blabla2016") + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse_iso("2016blabla") + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse_iso("10/4/2045") + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse_iso("2016-05T04:05:06.78912blahZ") + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse_iso("2016-05T04:05:06.78912Zblah") + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]) + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse("05/02/2017", ["YYYY", "MM/DD/YYYY"]) + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse("1919/05/23", ["YY/M/D", "YYYY/M/D"]) + # + # with self.assertWarns(GetParseWarning): + # self.parser.parse("2017/05/22", ["YYYY", "YYYY/MM/DD"]) def test_gnu_date(self): """ From e3126c77b24e6d6132be3faa169c3815bd011144 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 7 Jul 2019 23:54:03 -0400 Subject: [PATCH 12/50] Fixed a number of parsing issues with a couple of regex tweaks and additions --- arrow/parser.py | 114 ++++++++++-------------------------------- tests/parser_tests.py | 73 +++++++++++++-------------- 2 files changed, 60 insertions(+), 127 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index cec107171..d04ecabb2 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -47,6 +47,7 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") + _TIMESTAMP_RE = re.compile(r"\d+") _BASE_INPUT_RE_MAP = { "YYYY": _FOUR_DIGIT_RE, @@ -63,7 +64,7 @@ class DateTimeParser(object): "m": _ONE_OR_TWO_DIGIT_RE, "ss": _TWO_DIGIT_RE, "s": _ONE_OR_TWO_DIGIT_RE, - "X": re.compile(r"\d+"), + "X": _TIMESTAMP_RE, "ZZZ": _TZ_NAME_RE, "ZZ": _TZ_RE, "Z": _TZ_RE, @@ -101,9 +102,14 @@ def __init__(self, locale="en_us", cache_size=0): self._generate_pattern_re ) + # TODO: since we support more than ISO-8601, we should rename this function def parse_iso(self, string): + # TODO: account for more than 1 space like arrow.get(" 2016") + # string = string.strip() - has_time = "T" in string or " " in string.strip() + has_space_divider = " " in string and len(string.strip().split(" ")) == 2 + + has_time = "T" in string or has_space_divider space_divider = " " in string.strip() has_tz = False @@ -120,11 +126,11 @@ def parse_iso(self, string): time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") - # TODO "20160504T010203Z" parses incorectly, time part is HH only, due to Z changing len + # TODO "20160504T010203Z" parses incorrectly, time part is HH only, due to Z changing len is_basic_time_format = colon_count == 0 has_tz = len(time_parts) > 1 - has_hours = colon_count == 0 or len(time_string) == 2 + has_hours = len(time_string) == 2 has_minutes = colon_count == 1 or len(time_string) == 4 has_seconds = colon_count == 2 or len(time_string) == 6 has_subseconds = re.search("[.,]", time_parts[0]) @@ -146,7 +152,7 @@ def parse_iso(self, string): # IDEA reduced set of date formats for basic - # TODO: add tests for all the new formats + # TODO: add tests for all the new formats, especially basic format # required date formats to test against formats = [ "YYYY-MM-DD", @@ -227,6 +233,21 @@ def _generate_pattern_re(self, fmt): if i < len(escaped_data): final_fmt_pattern += escaped_data[i][1:-1] + # Wrap final_fmt_pattern in a custom word boundary to strictly + # match the formatting pattern and filter out date and time formats + # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, + # blah1998-09-12blah. The custom word boundary matches every character + # that is not a whitespace character to allow for searching for a date + # and time string in a natural language sentence. Therefore, searching + # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will + # work properly. + # Reference: https://stackoverflow.com/q/14232931/3820660 + starting_word_boundary = r"(? - # FIXME arrow.get("Call 01-02-03 on 79-01-01 12:05:10", "YY-MM-DD HH:mm:ss") warns incorrectly - # FIXME arrow.get("79-01-01 12:05:10", "YY-MM-DD HH:mm:ss") warns incorrectly - # IDEA test for whitespace on either side of match? - if "YY" in fmt_tokens and match.start != 0 or match.end() != len(string): - warnings.warn( - "Parser loosely matched {fmt} on '{string}', in version " - "0.15.0 this will raise a ParserError.".format( - fmt=fmt, string=string - ), - category=GetParseWarning, - ) - raise ParserError - - if fmt == "YYYY": - # accounts for arrow.get('05/02/2017', ['YYYY', 'MM/DD/YYYY']) - if match.start() != 0 or match.end() != len(string): - warnings.warn( - "Parser loosely matched {fmt} on '{string}', in version " - "0.15.0 this will raise a ParserError.".format( - fmt=fmt, string=string - ), - category=GetParseWarning, - ) - raise ParserError - - # Fixes bug where "15/01/2019" matches to "D/M/YY" - # arrow.get("15/01/2019", ["D/M/YY", "D/M/YYYY"]) - if "YY" in fmt_tokens and match.end() != len(string): - raise ParserError - - # TODO: talk to Chris about these conditionals - # if string[-1] == "Z" and match.end() != len(string) - 1: - # # TODO: add an exception message - # raise ParserError - # - # if string[-1] != "Z" and match.end() != len(string): - # # TODO: add an exception message - # raise ParserError - # - # if match.start() != 0: - # # TODO: add an exception message - # raise ParserError - - # if ("YY" in fmt_tokens or "YYYY" in fmt_tokens) and (match.end() != len(string) or match.start() != 0): - # # TODO: add an exception message - # raise ParserError - parts = {} for token in fmt_tokens: if token == "Do": diff --git a/tests/parser_tests.py b/tests/parser_tests.py index deec9a09d..f1cd3229f 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -136,23 +136,6 @@ def test_YY_and_YYYY_format_list(self): datetime(2019, 1, 15, 4, 5, 6, 789120, tzinfo=tz.tzutc()), ) - def test_long_year_input(self): - - # TODO: ask Chris if this should throw a ParserError - # Pendulum does not throw an error - self.assertEqual( - self.parser.parse("09 January 123456789101112", "DD MMMM YYYY"), - datetime(1234, 1, 9), - ) - - # Pendulum throws an error - with self.assertRaises(ParserError): - self.parser.parse("123456789101112 09 January", "YYYY DD MMMM") - - # Pendulum throws an error - with self.assertRaises(ParserError): - self.parser.parse("68096653015/01/19", "YY/M/DD") - class DateTimeParserParseTests(Chai): def setUp(self): @@ -357,6 +340,17 @@ def test_parse_subsecond_rounding(self): self.assertEqual(self.parser.parse(string, format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) + # Regression tests for issue #560 + def test_parse_long_year(self): + with self.assertRaises(ParserError): + self.parser.parse("09 January 123456789101112", "DD MMMM YYYY"), + + with self.assertRaises(ParserError): + self.parser.parse("123456789101112 09 January", "YYYY DD MMMM") + + with self.assertRaises(ParserError): + self.parser.parse("68096653015/01/19", "YY/M/DD") + class DateTimeParserRegexTests(Chai): def setUp(self): @@ -653,31 +647,32 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) - def test_iso8601_string_with_extra_words_at_start_and_end(self): + def test_parse_with_extra_words_at_start_and_end(self): + input_format_pairs = [ + ("blah2016", "YYYY"), + ("blah2016blah", "YYYY"), + ("2016blah", "YYYY"), + ("2016-05blah", "YYYY-MM"), + ("2016-05-16blah", "YYYY-MM-DD"), + ("2016-05T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), + ("2016-05T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ] - with self.assertRaises(ParserError): - self.parser.parse_iso("2016-05blah") + for pair in input_format_pairs: + with self.assertRaises(ParserError): + self.parser.parse_iso(pair[0]) - with self.assertRaises(ParserError): - self.parser.parse_iso("2016-05-16blah") + with self.assertRaises(ParserError): + self.parser.parse(pair[0], pair[1]) - with self.assertRaises(ParserError): - self.parser.parse_iso("2016-05T04:05:06.78912ZblahZ") - - with self.assertRaises(ParserError): - self.parser.parse_iso("2016-05T04:05:06.78912Zblah") - - with self.assertRaises(ParserError): - self.parser.parse_iso("2016-05T04:05:06.78912blahZ") - - with self.assertRaises(ParserError): - self.parser.parse_iso("blah2016") - - with self.assertRaises(ParserError): - self.parser.parse_iso("2016blah") - - with self.assertRaises(ParserError): - self.parser.parse_iso("blah2016blah") + # Spaces surrounding the parsable date are ok because we + # allow the parsing of natural language input + self.assertEqual(self.parser.parse_iso("blah 2016 blah"), datetime(2016, 1, 1)) + self.assertEqual( + self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1) + ) class TzinfoParserTests(Chai): From f77e6984ffe5b088fa30b8226d6590bbdc0bc1ba Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Mon, 8 Jul 2019 01:09:55 -0400 Subject: [PATCH 13/50] Added unit tests and added cleanup/validation to datetimte string in parse_iso --- arrow/parser.py | 129 ++++++++++++++++++++++-------------------- tests/parser_tests.py | 74 +++++++++++++++++++++--- 2 files changed, 135 insertions(+), 68 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index d04ecabb2..d9ac5bee3 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -103,22 +103,51 @@ def __init__(self, locale="en_us", cache_size=0): ) # TODO: since we support more than ISO-8601, we should rename this function - def parse_iso(self, string): - # TODO: account for more than 1 space like arrow.get(" 2016") - # string = string.strip() + def parse_iso(self, datetime_string): + # TODO: talk to Chris about this => the below space divider checks + # are not really necessary thanks to the new regex changes, but I think + # it is good to include them to provide better error messages. - has_space_divider = " " in string and len(string.strip().split(" ")) == 2 + # strip leading and trailing whitespace + datetime_string = datetime_string.strip() - has_time = "T" in string or has_space_divider - space_divider = " " in string.strip() + has_space_divider = " " in datetime_string + num_space_dividers = len(datetime_string.split(" ")) + if has_space_divider and num_space_dividers != 2: + raise ParserError( + "Expected 1 space divider, but was given {}. Try passing in a format string to resolve this.".format( + num_space_dividers + ) + ) + + has_time = has_space_divider or "T" in datetime_string has_tz = False + # TODO: add tests for all the new formats, especially basic format + # required date formats to test against + formats = [ + "YYYY-MM-DD", + "YYYY-M-DD", + "YYYY-M-D", + "YYYY/MM/DD", + "YYYY/M/DD", + "YYYY/M/D", + "YYYY.MM.DD", + "YYYY.M.DD", + "YYYY.M.D", + "YYYYMMDD", + "YYYY-MM", + "YYYY/MM", + "YYYY.MM", + "YYYY", + ] + if has_time: - if space_divider: - date_string, time_string = string.split(" ", 1) + if has_space_divider: + date_string, time_string = datetime_string.split(" ", 1) else: - date_string, time_string = string.split("T", 1) + date_string, time_string = datetime_string.split("T", 1) # TODO: understand why we are not accounting for Z directly # currently Z is ignored entirely but fromdatetime defaults to UTC, see arrow.py L196 @@ -150,40 +179,45 @@ def parse_iso(self, string): if is_basic_time_format: time_string = time_string.replace(":", "") - # IDEA reduced set of date formats for basic + if has_space_divider: + formats = ["{} {}".format(f, time_string) for f in formats] + else: + formats = ["{}T{}".format(f, time_string) for f in formats] - # TODO: add tests for all the new formats, especially basic format - # required date formats to test against - formats = [ - "YYYY-MM-DD", - "YYYY-M-DD", - "YYYY-M-D", - "YYYY/MM/DD", - "YYYY/M/DD", - "YYYY/M/D", - "YYYY.MM.DD", - "YYYY.M.DD", - "YYYY.M.D", - "YYYYMMDD", - "YYYY-MM", - "YYYY/MM", - "YYYY.MM", - "YYYY", - # "YY", this is not a good format to try by default? - ] - - if has_time: - formats = ["{}T{}".format(f, time_string) for f in formats] + # TODO: reduce set of date formats for basic? if has_time and has_tz: # Add "Z" to format strings to indicate to _parse_tokens # that a timezone needs to be parsed formats = ["{}Z".format(f) for f in formats] - if space_divider: - formats = [item.replace("T", " ", 1) for item in formats] + # TODO: make thrown error messages less cryptic and more informative + return self._parse_multiformat(datetime_string, formats, True) + + def parse(self, datetime_string, fmt, from_parse_iso=False): + + if isinstance(fmt, list): + return self._parse_multiformat(datetime_string, fmt) + + fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) - return self._parse_multiformat(string, formats, True) + match = fmt_pattern_re.search(datetime_string) + if match is None: + raise ParserError( + "Failed to match '{}' when parsing '{}'".format( + fmt_pattern_re.pattern, datetime_string + ) + ) + + parts = {} + for token in fmt_tokens: + if token == "Do": + value = match.group("value") + else: + value = match.group(token) + self._parse_token(token, value, parts) + + return self._build_datetime(parts) def _generate_pattern_re(self, fmt): @@ -250,31 +284,6 @@ def _generate_pattern_re(self, fmt): return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE) - def parse(self, string, fmt, from_parse_iso=False): - - if isinstance(fmt, list): - return self._parse_multiformat(string, fmt) - - fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) - - match = fmt_pattern_re.search(string) - if match is None: - raise ParserError( - "Failed to match '{}' when parsing '{}'".format( - fmt_pattern_re.pattern, string - ) - ) - - parts = {} - for token in fmt_tokens: - if token == "Do": - value = match.group("value") - else: - value = match.group(token) - self._parse_token(token, value, parts) - - return self._build_datetime(parts) - def _parse_token(self, token, value, parts): if token == "YYYY": diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f1cd3229f..c1679e245 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -647,33 +647,91 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) - def test_parse_with_extra_words_at_start_and_end(self): + def test_parse_with_extra_words_at_start_and_end_invalid(self): + # The tuple's second entry is None if the datetime string + # is valid when a format string is passed in input_format_pairs = [ ("blah2016", "YYYY"), ("blah2016blah", "YYYY"), + ("blah 2016 blah", None), ("2016blah", "YYYY"), ("2016-05blah", "YYYY-MM"), ("2016-05-16blah", "YYYY-MM-DD"), - ("2016-05T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), - ("2016-05T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), - ("2016-05T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), - ("2016-05T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), + ("2016-05-16T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("Meet me at 2016-05-16T04:05:06.789120 on Tuesday", None), + ("Meet me at 2016-05-16 04:05:06.789120 on Tuesday", None), ] for pair in input_format_pairs: with self.assertRaises(ParserError): self.parser.parse_iso(pair[0]) - with self.assertRaises(ParserError): - self.parser.parse(pair[0], pair[1]) + if pair[1] is not None: + with self.assertRaises(ParserError): + self.parser.parse(pair[0], pair[1]) + def test_parse_with_extra_words_at_start_and_end_valid(self): # Spaces surrounding the parsable date are ok because we # allow the parsing of natural language input - self.assertEqual(self.parser.parse_iso("blah 2016 blah"), datetime(2016, 1, 1)) self.assertEqual( self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1) ) + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", + "YYYY-MM-DDThh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", + "YYYY-MM-DD hh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + def test_parse_with_leading_and_trailing_whitespace(self): + self.assertEqual(self.parser.parse_iso(" 2016"), datetime(2016, 1, 1)) + self.assertEqual(self.parser.parse(" 2016", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual(self.parser.parse_iso("2016 "), datetime(2016, 1, 1)) + self.assertEqual(self.parser.parse("2016 ", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual( + self.parser.parse_iso(" 2016 "), datetime(2016, 1, 1) + ) + self.assertEqual( + self.parser.parse(" 2016 ", "YYYY"), datetime(2016, 1, 1) + ) + + self.assertEqual( + self.parser.parse_iso(" 2016-05-16 04:05:06.789120 "), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + self.assertEqual( + self.parser.parse( + " 2016-05-16 04:05:06.789120 ", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse_iso(" 2016-05-16T04:05:06.789120 "), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + self.assertEqual( + self.parser.parse( + " 2016-05-16T04:05:06.789120 ", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + class TzinfoParserTests(Chai): def setUp(self): From 4c331c7203c4c272929a59cd0514a76f17510951 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Tue, 9 Jul 2019 00:11:55 -0400 Subject: [PATCH 14/50] Tweaked space and t divider checks and added a few more test inputs --- arrow/parser.py | 17 +++++++++++------ tests/parser_tests.py | 6 ++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index d9ac5bee3..754b2f978 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -107,21 +107,26 @@ def parse_iso(self, datetime_string): # TODO: talk to Chris about this => the below space divider checks # are not really necessary thanks to the new regex changes, but I think # it is good to include them to provide better error messages. + # my rationale is that it is better to fail early # strip leading and trailing whitespace datetime_string = datetime_string.strip() has_space_divider = " " in datetime_string + has_t_divider = "T" in datetime_string - num_space_dividers = len(datetime_string.split(" ")) - if has_space_divider and num_space_dividers != 2: + num_spaces = datetime_string.count(" ") + if (has_space_divider and num_spaces != 1) or ( + has_t_divider and num_spaces > 0 + ): + # TODO: update this message since "ISO 8601-like" may not be clear raise ParserError( - "Expected 1 space divider, but was given {}. Try passing in a format string to resolve this.".format( - num_space_dividers + "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( + datetime_string ) ) - has_time = has_space_divider or "T" in datetime_string + has_time = has_space_divider or has_t_divider has_tz = False # TODO: add tests for all the new formats, especially basic format @@ -174,7 +179,7 @@ def parse_iso(self, datetime_string): time_string = "HH" else: # TODO: add tests for new conditional cases - raise ValueError("No valid time component provided.") + raise ParserError("No valid time component provided.") if is_basic_time_format: time_string = time_string.replace(":", "") diff --git a/tests/parser_tests.py b/tests/parser_tests.py index c1679e245..7e9766223 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -654,6 +654,12 @@ def test_parse_with_extra_words_at_start_and_end_invalid(self): ("blah2016", "YYYY"), ("blah2016blah", "YYYY"), ("blah 2016 blah", None), + ("blah 2016", None), + ("2016 blah", None), + ("blah 2016-05-16 04:05:06.789120", None), + ("2016-05-16 04:05:06.789120 blah", None), + ("blah 2016-05-16T04:05:06.789120", None), + ("2016-05-16T04:05:06.789120 blah", None), ("2016blah", "YYYY"), ("2016-05blah", "YYYY-MM"), ("2016-05-16blah", "YYYY-MM-DD"), From 08df5684674369f809f568691c034203539baed6 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Tue, 9 Jul 2019 00:34:58 -0400 Subject: [PATCH 15/50] Split up recently added parse_iso and parse unit tests and removed from_parse_iso flag --- arrow/parser.py | 8 +- tests/parser_tests.py | 203 ++++++++++++++++++++++++++---------------- 2 files changed, 129 insertions(+), 82 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 754b2f978..0677f1e5c 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -197,9 +197,9 @@ def parse_iso(self, datetime_string): formats = ["{}Z".format(f) for f in formats] # TODO: make thrown error messages less cryptic and more informative - return self._parse_multiformat(datetime_string, formats, True) + return self._parse_multiformat(datetime_string, formats) - def parse(self, datetime_string, fmt, from_parse_iso=False): + def parse(self, datetime_string, fmt): if isinstance(fmt, list): return self._parse_multiformat(datetime_string, fmt) @@ -376,13 +376,13 @@ def _build_datetime(parts): tzinfo=parts.get("tzinfo"), ) - def _parse_multiformat(self, string, formats, from_parse_iso=False): + def _parse_multiformat(self, string, formats): _datetime = None for fmt in formats: try: - _datetime = self.parse(string, fmt, from_parse_iso) + _datetime = self.parse(string, fmt) break except ParserError: pass diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 7e9766223..09522ad3c 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -22,10 +22,8 @@ def test_parse_multiformat(self): mock_datetime = self.mock() - self.expect(self.parser.parse).args("str", "fmt_a", False).raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b", False).returns( - mock_datetime - ) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_b").returns(mock_datetime) result = self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -33,8 +31,8 @@ def test_parse_multiformat(self): def test_parse_multiformat_all_fail(self): - self.expect(self.parser.parse).args("str", "fmt_a", False).raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b", False).raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserError) with self.assertRaises(ParserError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -43,9 +41,7 @@ def test_parse_multiformat_unself_expected_fail(self): class UnselfExpectedError(Exception): pass - self.expect(self.parser.parse).args("str", "fmt_a", False).raises( - UnselfExpectedError - ) + self.expect(self.parser.parse).args("str", "fmt_a").raises(UnselfExpectedError) with self.assertRaises(UnselfExpectedError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -351,6 +347,103 @@ def test_parse_long_year(self): with self.assertRaises(ParserError): self.parser.parse("68096653015/01/19", "YY/M/DD") + def test_parse_with_extra_words_at_start_and_end_invalid(self): + input_format_pairs = [ + ("blah2016", "YYYY"), + ("blah2016blah", "YYYY"), + ("2016blah", "YYYY"), + ("2016-05blah", "YYYY-MM"), + ("2016-05-16blah", "YYYY-MM-DD"), + ("2016-05-16T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), + ("2016-05-16T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ] + + for pair in input_format_pairs: + with self.assertRaises(ParserError): + self.parser.parse(pair[0], pair[1]) + + def test_parse_with_extra_words_at_start_and_end_valid(self): + # Spaces surrounding the parsable date are ok because we + # allow the parsing of natural language input + self.assertEqual( + self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1) + ) + + self.assertEqual(self.parser.parse("blah 2016", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual(self.parser.parse("2016 blah", "YYYY"), datetime(2016, 1, 1)) + + # test one additional space along with space divider + self.assertEqual( + self.parser.parse( + "blah 2016-05-16 04:05:06.789120", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "2016-05-16 04:05:06.789120 blah", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + # test one additional space along with T divider + self.assertEqual( + self.parser.parse( + "blah 2016-05-16T04:05:06.789120", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "2016-05-16T04:05:06.789120 blah", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", + "YYYY-MM-DDThh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", + "YYYY-MM-DD hh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + def test_parse_with_leading_and_trailing_whitespace(self): + self.assertEqual(self.parser.parse(" 2016", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual(self.parser.parse("2016 ", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual( + self.parser.parse(" 2016 ", "YYYY"), datetime(2016, 1, 1) + ) + + self.assertEqual( + self.parser.parse( + " 2016-05-16 04:05:06.789120 ", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + " 2016-05-16T04:05:06.789120 ", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + class DateTimeParserRegexTests(Chai): def setUp(self): @@ -647,96 +740,50 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) - def test_parse_with_extra_words_at_start_and_end_invalid(self): - # The tuple's second entry is None if the datetime string - # is valid when a format string is passed in - input_format_pairs = [ - ("blah2016", "YYYY"), - ("blah2016blah", "YYYY"), - ("blah 2016 blah", None), - ("blah 2016", None), - ("2016 blah", None), - ("blah 2016-05-16 04:05:06.789120", None), - ("2016-05-16 04:05:06.789120 blah", None), - ("blah 2016-05-16T04:05:06.789120", None), - ("2016-05-16T04:05:06.789120 blah", None), - ("2016blah", "YYYY"), - ("2016-05blah", "YYYY-MM"), - ("2016-05-16blah", "YYYY-MM-DD"), - ("2016-05-16T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), - ("2016-05-16T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), - ("2016-05-16T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), - ("2016-05-16T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), - ("Meet me at 2016-05-16T04:05:06.789120 on Tuesday", None), - ("Meet me at 2016-05-16 04:05:06.789120 on Tuesday", None), + def test_parse_iso_with_extra_words_at_start_and_end_invalid(self): + test_inputs = [ + "blah2016", + "blah2016blah", + "blah 2016 blah", + "blah 2016", + "2016 blah", + "blah 2016-05-16 04:05:06.789120", + "2016-05-16 04:05:06.789120 blah", + "blah 2016-05-16T04:05:06.789120", + "2016-05-16T04:05:06.789120 blah", + "2016blah", + "2016-05blah", + "2016-05-16blah", + "2016-05-16T04:05:06.789120blah", + "2016-05-16T04:05:06.789120ZblahZ", + "2016-05-16T04:05:06.789120Zblah", + "2016-05-16T04:05:06.789120blahZ", + "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", + "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", ] - for pair in input_format_pairs: + for ti in test_inputs: with self.assertRaises(ParserError): - self.parser.parse_iso(pair[0]) - - if pair[1] is not None: - with self.assertRaises(ParserError): - self.parser.parse(pair[0], pair[1]) - - def test_parse_with_extra_words_at_start_and_end_valid(self): - # Spaces surrounding the parsable date are ok because we - # allow the parsing of natural language input - self.assertEqual( - self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1) - ) - - self.assertEqual( - self.parser.parse( - "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", - "YYYY-MM-DDThh:mm:ss.S", - ), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) - - self.assertEqual( - self.parser.parse( - "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", - "YYYY-MM-DD hh:mm:ss.S", - ), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) + self.parser.parse_iso(ti) - def test_parse_with_leading_and_trailing_whitespace(self): + def test_parse_iso_with_leading_and_trailing_whitespace(self): self.assertEqual(self.parser.parse_iso(" 2016"), datetime(2016, 1, 1)) - self.assertEqual(self.parser.parse(" 2016", "YYYY"), datetime(2016, 1, 1)) self.assertEqual(self.parser.parse_iso("2016 "), datetime(2016, 1, 1)) - self.assertEqual(self.parser.parse("2016 ", "YYYY"), datetime(2016, 1, 1)) self.assertEqual( self.parser.parse_iso(" 2016 "), datetime(2016, 1, 1) ) - self.assertEqual( - self.parser.parse(" 2016 ", "YYYY"), datetime(2016, 1, 1) - ) self.assertEqual( self.parser.parse_iso(" 2016-05-16 04:05:06.789120 "), datetime(2016, 5, 16, 4, 5, 6, 789120), ) - self.assertEqual( - self.parser.parse( - " 2016-05-16 04:05:06.789120 ", "YYYY-MM-DD hh:mm:ss.S" - ), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) self.assertEqual( self.parser.parse_iso(" 2016-05-16T04:05:06.789120 "), datetime(2016, 5, 16, 4, 5, 6, 789120), ) - self.assertEqual( - self.parser.parse( - " 2016-05-16T04:05:06.789120 ", "YYYY-MM-DDThh:mm:ss.S" - ), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) class TzinfoParserTests(Chai): From 7622b8bc6ed08a6844249a55b64fe877c1f79a33 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 13 Jul 2019 16:00:35 -0400 Subject: [PATCH 16/50] Fixed bugs with basic format and multiple subsecond tokens --- arrow/parser.py | 24 +++++++++------------- docs/index.rst | 2 +- tests/parser_tests.py | 47 +++---------------------------------------- tests/util_tests.py | 4 +--- 4 files changed, 15 insertions(+), 62 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 0677f1e5c..1a12605ec 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -104,11 +104,6 @@ def __init__(self, locale="en_us", cache_size=0): # TODO: since we support more than ISO-8601, we should rename this function def parse_iso(self, datetime_string): - # TODO: talk to Chris about this => the below space divider checks - # are not really necessary thanks to the new regex changes, but I think - # it is good to include them to provide better error messages. - # my rationale is that it is better to fail early - # strip leading and trailing whitespace datetime_string = datetime_string.strip() @@ -119,7 +114,6 @@ def parse_iso(self, datetime_string): if (has_space_divider and num_spaces != 1) or ( has_t_divider and num_spaces > 0 ): - # TODO: update this message since "ISO 8601-like" may not be clear raise ParserError( "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( datetime_string @@ -149,18 +143,19 @@ def parse_iso(self, datetime_string): ] if has_time: + # Z is ignored entirely because fromdatetime defaults to UTC in arrow.py + if datetime_string[-1] == "Z": + datetime_string = datetime_string[:-1] + if has_space_divider: date_string, time_string = datetime_string.split(" ", 1) else: date_string, time_string = datetime_string.split("T", 1) - # TODO: understand why we are not accounting for Z directly - # currently Z is ignored entirely but fromdatetime defaults to UTC, see arrow.py L196 - # '2013-02-03T04:05:06.78912Z' time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") - # TODO "20160504T010203Z" parses incorrectly, time part is HH only, due to Z changing len + # TODO: add test for basic format with Z "20160504T010203Z" is_basic_time_format = colon_count == 0 has_tz = len(time_parts) > 1 @@ -238,8 +233,7 @@ def _generate_pattern_re(self, fmt): # Extract the bracketed expressions to be reinserted later. escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) - # Any number of S is the same as one. - escaped_fmt = re.sub("S+", "S", escaped_fmt) + escaped_data = re.findall(self._ESCAPE_RE, fmt) fmt_pattern = escaped_fmt @@ -283,8 +277,10 @@ def _generate_pattern_re(self, fmt): # Reference: https://stackoverflow.com/q/14232931/3820660 starting_word_boundary = r"(? Date: Sat, 13 Jul 2019 16:31:31 -0400 Subject: [PATCH 17/50] Added an extra french test --- tests/parser_tests.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 708e0ec9a..b61f6fe99 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -881,7 +881,7 @@ def test_localized_meridians_capitalized(self): parser_.parse("2013-01-01 5 DU", "YYYY-MM-DD h A"), datetime(2013, 1, 1, 17) ) - # regression check for https://github.com/crsmithdev/arrow/issues/607 + # regression test for issue #607 def test_es_meridians(self): parser_ = parser.DateTimeParser("es") @@ -890,6 +890,18 @@ def test_es_meridians(self): datetime(2019, 6, 30, 20, 0), ) + with self.assertRaises(ParserError): + parser_.parse( + "Junio 30, 2019 - 08:00 pasdfasdfm", "MMMM DD, YYYY - hh:mm a" + ) + + def test_fr_meridians(self): + parser_ = parser.DateTimeParser("fr") + + # the French locale always uses a 24 hour clock, so it does not support meridians + with self.assertRaises(ParserError): + parser_.parse("Janvier 30, 2019 - 08:00 pm", "MMMM DD, YYYY - hh:mm a") + class DateTimeParserMonthOrdinalDayTests(Chai): def setUp(self): From 188693d8084d88e726867721bb825415a5be9db4 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 13 Jul 2019 17:31:09 -0400 Subject: [PATCH 18/50] Fixed an issue with the timestamp not strictly matching --- arrow/parser.py | 3 ++- tests/parser_tests.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arrow/parser.py b/arrow/parser.py index 1a12605ec..7711621cb 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -47,7 +47,7 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") - _TIMESTAMP_RE = re.compile(r"\d+") + _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") _BASE_INPUT_RE_MAP = { "YYYY": _FOUR_DIGIT_RE, @@ -124,6 +124,7 @@ def parse_iso(self, datetime_string): has_tz = False # TODO: add tests for all the new formats, especially basic format + # required date formats to test against formats = [ "YYYY-MM-DD", diff --git a/tests/parser_tests.py b/tests/parser_tests.py index b61f6fe99..0173fedef 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -132,6 +132,20 @@ def test_YY_and_YYYY_format_list(self): datetime(2019, 1, 15, 4, 5, 6, 789120, tzinfo=tz.tzutc()), ) + # regression test for issue #447 + def test_parse_timestamp_token(self): + # should not match on the "X" token + self.assertEqual( + self.parser.parse( + "15 Jul 2000", + ["MM/DD/YYYY", "YYYY-MM-DD", "X", "DD-MMMM-YYYY", "D MMM YYYY"], + ), + datetime(2000, 7, 15), + ) + + with self.assertRaises(ParserError): + self.parser.parse("15 Jul", "X") + class DateTimeParserParseTests(Chai): def setUp(self): From e480ea006630600b387f3e16680bc4a0dc1be1ca Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 13 Jul 2019 17:49:23 -0400 Subject: [PATCH 19/50] Added overflow error test to is_timestamp --- tests/util_tests.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/util_tests.py b/tests/util_tests.py index 885c016b5..9855a05a4 100644 --- a/tests/util_tests.py +++ b/tests/util_tests.py @@ -19,3 +19,11 @@ def test_is_timestamp(self): full_datetime = "2019-06-23T13:12:42" self.assertFalse(util.is_timestamp(full_datetime)) + + overflow_timestamp_float = 99999999999999999999999999.99999999999999999999999999 + with self.assertRaises((OverflowError, ValueError)): + util.is_timestamp(overflow_timestamp_float) + + overflow_timestamp_int = int(overflow_timestamp_float) + with self.assertRaises((OverflowError, ValueError)): + util.is_timestamp(overflow_timestamp_int) From 3f1a3c951458381bc73343ac2cd7132067dd8bf3 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 13 Jul 2019 17:52:27 -0400 Subject: [PATCH 20/50] Attempt at fixing linting --- .pre-commit-config.yaml | 2 +- .travis.yml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 825d4569d..9ef036661 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v1.20.1 + rev: v1.21.0 hooks: - id: pyupgrade - repo: https://github.com/pre-commit/pygrep-hooks diff --git a/.travis.yml b/.travis.yml index 35a55a21b..1bca8a256 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,9 +15,6 @@ matrix: - name: "Linting" python: 3.7 env: TOXENV=lint - cache: - directories: - - $HOME/.cache/pre-commit install: pip install -U codecov tox script: tox after_success: codecov From 87f63fb632130ea6bcba2121d6a46c1aac4bdc04 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 14 Jul 2019 13:08:14 -0400 Subject: [PATCH 21/50] Added support for DDD and DDDD tokens in arrow.get() --- arrow/parser.py | 60 +++++++++++++++++++++++++++++++++++-------------- docs/index.rst | 5 ++--- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 7711621cb..a45e620b7 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -2,7 +2,6 @@ from __future__ import absolute_import, unicode_literals import re -import warnings from datetime import datetime from dateutil import tz @@ -19,21 +18,6 @@ class ParserError(RuntimeError): pass -class GetParseWarning(DeprecationWarning): - """Raised when arrow.get() is passed a string with no formats and matches incorrectly - on one of the default formats. - - e.g. - arrow.get('blabla2016') -> - arrow.get('13/4/2045') -> - - In version 0.15.0 this warning will become a ParserError. - """ - - -warnings.simplefilter("default", GetParseWarning) - - class DateTimeParser(object): _FORMAT_RE = re.compile( @@ -43,6 +27,8 @@ class DateTimeParser(object): _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+") _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}") + _ONE_OR_TWO_OR_THREE_DIGIT_RE = re.compile(r"\d{1,3}") + _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") _TWO_DIGIT_RE = re.compile(r"\d{2}") _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") @@ -54,6 +40,8 @@ class DateTimeParser(object): "YY": _TWO_DIGIT_RE, "MM": _TWO_DIGIT_RE, "M": _ONE_OR_TWO_DIGIT_RE, + "DDDD": _THREE_DIGIT_RE, + "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, "DD": _TWO_DIGIT_RE, "D": _ONE_OR_TWO_DIGIT_RE, "HH": _TWO_DIGIT_RE, @@ -125,7 +113,7 @@ def parse_iso(self, datetime_string): # TODO: add tests for all the new formats, especially basic format - # required date formats to test against + # date formats (ISO-8601 and others) to test against formats = [ "YYYY-MM-DD", "YYYY-M-DD", @@ -137,6 +125,8 @@ def parse_iso(self, datetime_string): "YYYY.M.DD", "YYYY.M.D", "YYYYMMDD", + "YYYY-DDDD", + "YYYYDDDD", "YYYY-MM", "YYYY/MM", "YYYY.MM", @@ -301,6 +291,9 @@ def _parse_token(self, token, value, parts): elif token in ["MM", "M"]: parts["month"] = int(value) + elif token in ["DDDD", "DDD"]: + parts["day_of_year"] = int(value) + elif token in ["DD", "D"]: parts["day"] = int(value) @@ -354,6 +347,39 @@ def _build_datetime(parts): tz_utc = tz.tzutc() return datetime.fromtimestamp(timestamp, tz=tz_utc) + # TODO: add tests for this! + day_of_year = parts.get("day_of_year") + + if day_of_year: + year = parts.get("year") + month = parts.get("month") + if year is None: + raise ParserError( + "Year component is required with the DDD and DDDD tokens" + ) + + if month is not None: + raise ParserError( + "Month component is not allowed with the DDD and DDDD tokens" + ) + + date_string = "{}-{}".format(year, day_of_year) + try: + dt = datetime.strptime(date_string, "%Y-%j") + except ValueError: + raise ParserError( + "Expected a valid day of year, but received '{}'".format( + day_of_year + ) + ) + + # TODO: write test for 2015-366 + # datetime.strptime("2015-366", "%Y-%j") + # Changes year: datetime.datetime(2016, 1, 1, 0, 0) + parts["year"] = dt.year + parts["month"] = dt.month + parts["day"] = dt.day + am_pm = parts.get("am_pm") hour = parts.get("hour", 0) diff --git a/docs/index.rst b/docs/index.rst index a0a7775d2..7decad748 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -327,9 +327,9 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ | |M |1, 2, 3 ... 11, 12 | +--------------------------------+--------------+-------------------------------------------+ -|**Day of Year** |DDDD [#t5]_ |001, 002, 003 ... 364, 365 | +|**Day of Year** |DDDD |001, 002, 003 ... 364, 365 | +--------------------------------+--------------+-------------------------------------------+ -| |DDD [#t5]_ |1, 2, 3 ... 4, 5 | +| |DDD |1, 2, 3 ... 364, 365 | +--------------------------------+--------------+-------------------------------------------+ |**Day of Month** |DD |01, 02, 03 ... 30, 31 | +--------------------------------+--------------+-------------------------------------------+ @@ -387,7 +387,6 @@ Any token can be escaped when parsing by enclosing it within square brackets: .. [#t2] localization support only for formatting .. [#t3] the result is truncated to microseconds, with `half-to-even rounding `_. .. [#t4] timezone names from `tz database `_ provided via dateutil package -.. [#t5] support for the DDD and DDDD tokens will be added in a future release API Guide --------- From a6995bafdf87b573c8230181fec31c077b19ee87 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 14 Jul 2019 14:54:15 -0400 Subject: [PATCH 22/50] Cleaned up timestamp docs --- docs/index.rst | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 7decad748..fd1754ea2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,22 +26,16 @@ Get 'now' easily: >>> arrow.now('US/Pacific') -Create from timestamps (ints or floats, or strings that convert to a float): +Create from timestamps (:code:`int` or :code:`float`): .. code-block:: python >>> arrow.get(1367900664) - >>> arrow.get('1367900664') - - >>> arrow.get(1367900664.152325) - >>> arrow.get('1367900664.152325') - - Use a naive or timezone-aware datetime, or flexibly specify a timezone: .. code-block:: python From d4bcb73530e7cade1a098062b1f5d1eac41434e3 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 28 Jul 2019 11:51:41 -0400 Subject: [PATCH 23/50] Added tests for new timestring changes --- arrow/parser.py | 6 ++++-- tests/parser_tests.py | 48 +++++++++++++++---------------------------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index a45e620b7..912d56bca 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -34,6 +34,7 @@ class DateTimeParser(object): _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") + # TODO: test timestamp thoroughly _BASE_INPUT_RE_MAP = { "YYYY": _FOUR_DIGIT_RE, @@ -164,8 +165,9 @@ def parse_iso(self, datetime_string): elif has_hours: time_string = "HH" else: - # TODO: add tests for new conditional cases - raise ParserError("No valid time component provided.") + raise ParserError( + "Invalid time component provided. Please specify a format or provide a time in the form 'HH:mm:ss.S', 'HH:mm:ss', 'HH:mm', or 'HH'." + ) if is_basic_time_format: time_string = time_string.replace(":", "") diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 0173fedef..f1f9b9607 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -294,7 +294,6 @@ def test_parse_subsecond_rounding(self): # round down string = "2013-01-01 12:30:45.98765432" self.assertEqual(self.parser.parse(string, format), self.expected) - # import pudb; pudb.set_trace() self.assertEqual(self.parser.parse_iso(string), self.expected) # round half-up @@ -567,6 +566,23 @@ def test_YYYY_MM_DD_HH_mm(self): self.parser.parse_iso("2013-02-03 04:05"), datetime(2013, 2, 3, 4, 5) ) + def test_YYYY_MM_DD_HH(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03 04"), datetime(2013, 2, 3, 4) + ) + + def test_invalid_time(self): + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03 044") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03 04:05:06.") + def test_YYYY_MM_DD_HH_mm_ssZ(self): self.assertEqual( @@ -657,36 +673,6 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120), ) - # def test_bad_get_parsing(self): - # # fixes for loose get parsing - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse_iso("blabla2016") - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse_iso("2016blabla") - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse_iso("10/4/2045") - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse_iso("2016-05T04:05:06.78912blahZ") - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse_iso("2016-05T04:05:06.78912Zblah") - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse("15/01/2019", ["D/M/YY", "D/M/YYYY"]) - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse("05/02/2017", ["YYYY", "MM/DD/YYYY"]) - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse("1919/05/23", ["YY/M/D", "YYYY/M/D"]) - # - # with self.assertWarns(GetParseWarning): - # self.parser.parse("2017/05/22", ["YYYY", "YYYY/MM/DD"]) - def test_gnu_date(self): """ regression tests for parsing output from GNU date(1) From 6c8fbbad71f1d9a69b803de2a1950e04cef2fa42 Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Tue, 30 Jul 2019 22:47:44 +0100 Subject: [PATCH 24/50] Add tests for DDD and DDDD tokens --- tests/parser_tests.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f1f9b9607..6f4cf0e0f 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -416,6 +416,28 @@ def test_parse_with_leading_and_trailing_whitespace(self): datetime(2016, 5, 16, 4, 5, 6, 789120), ) + def test_parse_YYYY_DDDD(self): + self.assertEqual( + self.parser.parse("1998-136", "YYYY-DDDD"), datetime(1998, 5, 16) + ) + + self.assertEqual( + self.parser.parse("1998-006", "YYYY-DDDD"), datetime(1998, 1, 6) + ) + + with self.assertRaises(ParserError): + self.parser.parse("1998-456", "YYYY-DDDD") + + def test_parse_YYYY_DDD(self): + self.assertEqual(self.parser.parse("1998-6", "YYYY-DDD"), datetime(1998, 1, 6)) + + self.assertEqual( + self.parser.parse("1998-136", "YYYY-DDD"), datetime(1998, 5, 16) + ) + + with self.assertRaises(ParserError): + self.parser.parse("1998-756", "YYYY-DDD") + class DateTimeParserRegexTests(Chai): def setUp(self): @@ -511,6 +533,14 @@ def test_YYYY(self): self.assertEqual(self.parser.parse_iso("2013"), datetime(2013, 1, 1)) + def test_parse_YYYY_DDDD(self): + self.assertEqual(self.parser.parse_iso("1998-136"), datetime(1998, 5, 16)) + + self.assertEqual(self.parser.parse_iso("1998-006"), datetime(1998, 1, 6)) + + with self.assertRaises(ParserError): + self.parser.parse_iso("1998-456") + def test_YYYY_MM(self): for separator in DateTimeParser.SEPARATORS: From be87f39a9f82986d33d7fcfbed1a22e3e2617fe1 Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Thu, 1 Aug 2019 20:39:29 +0100 Subject: [PATCH 25/50] Further test cases for DDD and DDDD tokens --- arrow/parser.py | 1 + tests/parser_tests.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arrow/parser.py b/arrow/parser.py index fc7eff61e..c412d6dfa 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -377,6 +377,7 @@ def _build_datetime(parts): ) # TODO: write test for 2015-366 + # TODO: should we throw an error or mimic datetime? # datetime.strptime("2015-366", "%Y-%j") # Changes year: datetime.datetime(2016, 1, 1, 0, 0) parts["year"] = dt.year diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 6f4cf0e0f..f4382c45c 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -438,6 +438,19 @@ def test_parse_YYYY_DDD(self): with self.assertRaises(ParserError): self.parser.parse("1998-756", "YYYY-DDD") + # month cannot be passed with DDD and DDDD tokens + def test_parse_YYYY_MM_DDDD(self): + with self.assertRaises(ParserError): + self.parser.parse("2015-01-009", "YYYY-MM-DDDD") + + def test_parse_DDD_only(self): + with self.assertRaises(ParserError): + self.parser.parse("5", "DDD") + + def test_parse_DDDD_only(self): + with self.assertRaises(ParserError): + self.parser.parse("145", "DDDD") + class DateTimeParserRegexTests(Chai): def setUp(self): @@ -533,7 +546,7 @@ def test_YYYY(self): self.assertEqual(self.parser.parse_iso("2013"), datetime(2013, 1, 1)) - def test_parse_YYYY_DDDD(self): + def test_YYYY_DDDD(self): self.assertEqual(self.parser.parse_iso("1998-136"), datetime(1998, 5, 16)) self.assertEqual(self.parser.parse_iso("1998-006"), datetime(1998, 1, 6)) @@ -541,6 +554,17 @@ def test_parse_YYYY_DDDD(self): with self.assertRaises(ParserError): self.parser.parse_iso("1998-456") + def test_YYYY_DDDD_HH_mm_ssZ(self): + + self.assertEqual( + self.parser.parse_iso("2013-036 04:05:06+01:00"), + datetime(2013, 2, 5, 4, 5, 6, tzinfo=tz.tzoffset(None, 3600)), + ) + + def test_YYYY_MM_DDDD(self): + with self.assertRaises(ParserError): + self.parser.parse_iso("2014-05-125") + def test_YYYY_MM(self): for separator in DateTimeParser.SEPARATORS: From f02c8d73ad8253564787cea2e670751f387350ae Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Fri, 2 Aug 2019 22:12:07 +0100 Subject: [PATCH 26/50] Add test cases for iso8601 basic format Correct error in parse_iso when handling basic format timezones. --- arrow/parser.py | 12 ++++++------ tests/parser_tests.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index c412d6dfa..a710b2407 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -93,6 +93,7 @@ def __init__(self, locale="en_us", cache_size=0): ) # TODO: since we support more than ISO-8601, we should rename this function + # IDEA: break into multiple functions def parse_iso(self, datetime_string): # strip leading and trailing whitespace datetime_string = datetime_string.strip() @@ -114,7 +115,7 @@ def parse_iso(self, datetime_string): has_tz = False # TODO: add tests for all the new formats, especially basic format - + # IDEA: should YYYY MM DD style be accepted here? # date formats (ISO-8601 and others) to test against formats = [ "YYYY-MM-DD", @@ -148,13 +149,12 @@ def parse_iso(self, datetime_string): time_parts = re.split("[+-]", time_string, 1) colon_count = time_parts[0].count(":") - # TODO: add test for basic format with Z "20160504T010203Z" is_basic_time_format = colon_count == 0 has_tz = len(time_parts) > 1 - has_hours = len(time_string) == 2 - has_minutes = colon_count == 1 or len(time_string) == 4 - has_seconds = colon_count == 2 or len(time_string) == 6 + has_hours = len(time_parts[0]) == 2 + has_minutes = colon_count == 1 or len(time_parts[0]) == 4 + has_seconds = colon_count == 2 or len(time_parts[0]) == 6 has_subseconds = re.search("[.,]", time_parts[0]) if has_subseconds: @@ -178,7 +178,7 @@ def parse_iso(self, datetime_string): else: formats = ["{}T{}".format(f, time_string) for f in formats] - # TODO: reduce set of date formats for basic? + # TODO: reduce set of date formats for basic? test earlier? if has_time and has_tz: # Add "Z" to format strings to indicate to _parse_tokens diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f4382c45c..5dc261c5f 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -246,9 +246,9 @@ def test_parse_tz_zz(self): def test_parse_tz_name_zzz(self): for tz_name in ( # best solution would be to test on every available tz name from - # the tz database but it is actualy tricky to retrieve them from + # the tz database but it is actually tricky to retrieve them from # dateutil so here is short list that should match all - # naming patterns/conventions in used tz databaze + # naming patterns/conventions in used tz database "Africa/Tripoli", "America/Port_of_Spain", "Australia/LHI", @@ -798,6 +798,35 @@ def test_parse_iso_with_leading_and_trailing_whitespace(self): datetime(2016, 5, 16, 4, 5, 6, 789120), ) + def test_iso8601_basic_format(self): + self.assertEqual(self.parser.parse_iso("20180517"), datetime(2018, 5, 17)) + + self.assertEqual( + self.parser.parse_iso("20180517T10"), datetime(2018, 5, 17, 10) + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513.84"), + datetime(2018, 5, 17, 10, 55, 13, 840000), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513Z"), datetime(2018, 5, 17, 10, 55, 13) + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513-0700"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + # too many digits in date + with self.assertRaises(ParserError): + self.parser.parse_iso("201860517T105513Z") + + # too many digits in time + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T1055213Z") + class TzinfoParserTests(Chai): def setUp(self): From d15292318388373f43419390758a170ef6af5cfb Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 4 Aug 2019 11:29:33 -0400 Subject: [PATCH 27/50] Added back support for multiple subsecond tokens to align with format function --- arrow/parser.py | 6 +++- docs/index.rst | 2 +- tests/parser_tests.py | 69 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index a710b2407..3432b5213 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -228,6 +228,10 @@ def _generate_pattern_re(self, fmt): # Extract the bracketed expressions to be reinserted later. escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) + # Any number of S is the same as one. + # TODO: allow users to specify the number of digits to parse + escaped_fmt = re.sub("S+", "S", escaped_fmt) + escaped_data = re.findall(self._ESCAPE_RE, fmt) fmt_pattern = escaped_fmt @@ -315,7 +319,7 @@ def _parse_token(self, token, value, parts): elif token == "S": # We have the *most significant* digits of an arbitrary-precision integer. # We want the six most significant digits as an integer, rounded. - # FIXME: add nanosecond support somehow? + # IDEA: add nanosecond support somehow? Need datetime support for it first. value = value.ljust(7, str("0")) # floating-point (IEEE-754) defaults to half-to-even rounding diff --git a/docs/index.rst b/docs/index.rst index f6f10c928..8273a6f62 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -357,7 +357,7 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ | |s |0, 1, 2 ... 58, 59 | +--------------------------------+--------------+-------------------------------------------+ -|**Sub-second** |S |0, 02, 003, 000006, 123123123123... [#t3]_ | +|**Sub-second** |S... |0, 02, 003, 000006, 123123123123... [#t3]_ | +--------------------------------+--------------+-------------------------------------------+ |**Timezone** |ZZZ |Asia/Baku, Europe/Warsaw, GMT ... [#t4]_ | +--------------------------------+--------------+-------------------------------------------+ diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 5dc261c5f..f25bd1974 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -272,14 +272,44 @@ def test_parse_tz_name_zzz(self): self.parser.parse("2013-01-01 +1000", "YYYY-MM-DD ZZZ") def test_parse_subsecond(self): + # TODO: make both test_parse_subsecond functions in Parse and ParseISO + # tests use the same expected objects (use pytest fixtures) + self.expected = datetime(2013, 1, 1, 12, 30, 45, 900000) + self.assertEqual( + self.parser.parse("2013-01-01 12:30:45.9", "YYYY-MM-DD HH:mm:ss.S"), + self.expected, + ) - self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.expected = datetime(2013, 1, 1, 12, 30, 45, 980000) + self.assertEqual( + self.parser.parse("2013-01-01 12:30:45.98", "YYYY-MM-DD HH:mm:ss.SS"), + self.expected, + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987000) self.assertEqual( - self.parser.parse("2013-01-01 12:30:45.987654", "YYYY-MM-DD HH:mm:ss.S"), + self.parser.parse("2013-01-01 12:30:45.987", "YYYY-MM-DD HH:mm:ss.SSS"), self.expected, ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987600) self.assertEqual( - self.parser.parse_iso("2013-01-01 12:30:45.987654"), self.expected + self.parser.parse("2013-01-01 12:30:45.9876", "YYYY-MM-DD HH:mm:ss.SSSS"), + self.expected, + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987650) + self.assertEqual( + self.parser.parse("2013-01-01 12:30:45.98765", "YYYY-MM-DD HH:mm:ss.SSSSS"), + self.expected, + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.assertEqual( + self.parser.parse( + "2013-01-01 12:30:45.987654", "YYYY-MM-DD HH:mm:ss.SSSSSS" + ), + self.expected, ) def test_parse_subsecond_rounding(self): @@ -328,8 +358,6 @@ def test_parse_with_extra_words_at_start_and_end_invalid(self): ("2016-05-16T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), ("2016-05-16T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), ("2016-05-16T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), - ("2016-05-16T04:05:06.789120Z", "YYYY-MM-DDThh:mm:ss.SSZ"), - ("2016-05-16T04:05:06.789120Z", "YYYY-MM-DDThh:mm:ss.SSSSSSZ"), ] for pair in input_format_pairs: @@ -495,7 +523,7 @@ def test_format_subsecond(self): def test_format_tz(self): - self.assertEqual(self.format_regex.findall("ZZ-Z"), ["ZZ", "Z"]) + self.assertEqual(self.format_regex.findall("ZZZ-ZZ-Z"), ["ZZZ", "ZZ", "Z"]) def test_format_am_pm(self): @@ -727,6 +755,35 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120), ) + def test_parse_subsecond(self): + # TODO: make both test_parse_subsecond functions in Parse and ParseISO + # tests use the same expected objects (use pytest fixtures) + self.expected = datetime(2013, 1, 1, 12, 30, 45, 900000) + self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.9"), self.expected) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 980000) + self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.98"), self.expected) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987000) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.987"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987600) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.9876"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987650) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.98765"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.987654"), self.expected + ) + def test_gnu_date(self): """ regression tests for parsing output from GNU date(1) From 1b8e36bb5a0a4bee35197d446fcc681308c124ae Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 4 Aug 2019 17:54:13 -0400 Subject: [PATCH 28/50] Revamped regex of tz--work in progress --- arrow/parser.py | 32 +++++++++++++++++++++++++---- tests/parser_tests.py | 48 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 3432b5213..10b60c189 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -23,15 +23,23 @@ class DateTimeParser(object): _FORMAT_RE = re.compile( r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|X)" ) + # TODO: add support for inner brackets like "2018-03-09 8 [[h]] 40" _ESCAPE_RE = re.compile(r"\[[^\[\]]*\]") - _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+") _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}") _ONE_OR_TWO_OR_THREE_DIGIT_RE = re.compile(r"\d{1,3}") + _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+") + _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") - _TWO_DIGIT_RE = re.compile(r"\d{2}") + # _TZ_RE_ZZ = re.compile(r"^[+\-]\d{2}:(\d{2})?$|^ZZ?Z?$") + # _TZ_RE_Z = re.compile(r"^[+\-]\d{2}(\d{2})?$|^ZZ?Z?$") + + # _TZ_RE_ZZ = re.compile(r"^[+\-]\d{2}:(\d{2})?$|Z") + # _TZ_RE_Z = re.compile(r"^[+\-]\d{2}(\d{2})?$|Z") + _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") + # _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") # TODO: test timestamp thoroughly @@ -56,6 +64,8 @@ class DateTimeParser(object): "s": _ONE_OR_TWO_DIGIT_RE, "X": _TIMESTAMP_RE, "ZZZ": _TZ_NAME_RE, + # "ZZ": _TZ_RE_ZZ, + # "Z": _TZ_RE_Z, "ZZ": _TZ_RE, "Z": _TZ_RE, "S": _ONE_OR_MORE_DIGIT_RE, @@ -114,6 +124,8 @@ def parse_iso(self, datetime_string): has_time = has_space_divider or has_t_divider has_tz = False + # TODO: test basic format with timezone string without "+" + # TODO: add tests for all the new formats, especially basic format # IDEA: should YYYY MM DD style be accepted here? # date formats (ISO-8601 and others) to test against @@ -146,10 +158,22 @@ def parse_iso(self, datetime_string): else: date_string, time_string = datetime_string.split("T", 1) - time_parts = re.split("[+-]", time_string, 1) + time_parts = re.split(r"[+\-]", time_string, 1) colon_count = time_parts[0].count(":") is_basic_time_format = colon_count == 0 + tz_format = "Z" + + # tz offset is present + if len(time_parts) == 2: + tz_offset = time_parts[1] + + if ":" in tz_offset: + # TODO: add error message + if is_basic_time_format: + raise ParserError + + tz_format = "ZZ" has_tz = len(time_parts) > 1 has_hours = len(time_parts[0]) == 2 @@ -183,7 +207,7 @@ def parse_iso(self, datetime_string): if has_time and has_tz: # Add "Z" to format strings to indicate to _parse_tokens # that a timezone needs to be parsed - formats = ["{}Z".format(f) for f in formats] + formats = ["{}{}".format(f, tz_format) for f in formats] # TODO: make thrown error messages less cryptic and more informative return self._parse_multiformat(datetime_string, formats) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f25bd1974..bec95c55a 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -533,6 +533,14 @@ def test_format_timestamp(self): self.assertEqual(self.format_regex.findall("X"), ["X"]) + def test_escape(self): + + escape_regex = parser.DateTimeParser._ESCAPE_RE + + self.assertEqual( + escape_regex.findall("2018-03-09 8 [h] 40 [hello]"), ["[h]", "[hello]"] + ) + def test_month_names(self): p = parser.DateTimeParser("en_us") @@ -553,16 +561,52 @@ def test_month_abbreviations(self): def test_digits(self): + self.assertEqual( + parser.DateTimeParser._ONE_OR_TWO_DIGIT_RE.findall("4-56"), ["4", "56"] + ) + self.assertEqual( + parser.DateTimeParser._ONE_OR_TWO_OR_THREE_DIGIT_RE.findall("4-56-789"), + ["4", "56", "789"], + ) + self.assertEqual( + parser.DateTimeParser._ONE_OR_MORE_DIGIT_RE.findall("4-56-789-1234-12345"), + ["4", "56", "789", "1234", "12345"], + ) self.assertEqual( parser.DateTimeParser._TWO_DIGIT_RE.findall("12-3-45"), ["12", "45"] ) self.assertEqual( - parser.DateTimeParser._FOUR_DIGIT_RE.findall("1234-56"), ["1234"] + parser.DateTimeParser._THREE_DIGIT_RE.findall("123-4-56"), ["123"] ) self.assertEqual( - parser.DateTimeParser._ONE_OR_TWO_DIGIT_RE.findall("4-56"), ["4", "56"] + parser.DateTimeParser._FOUR_DIGIT_RE.findall("1234-56"), ["1234"] ) + # def test_tz(self): + # tz_re = parser.DateTimeParser._TZ_RE + # + # self.assertEqual( + # tz_re.findall("-07:00"), ["-07", "00"] + # ) + + # self.assertEqual( + # tz_re.findall("+07:00"), ["+07:00"] + # ) + # + # self.assertEqual( + # tz_re.findall("-0700"), ["-0700"] + # ) + # + # self.assertEqual( + # tz_re.findall("+0700"), ["+0700"] + # ) + # + # self.assertEqual( + # tz_re.findall("Z"), ["Z"] + # ) + + # what about +Z? + class DateTimeParserISOTests(Chai): def setUp(self): From 4db714570a9ce58a08c72aa1477e9e7a48ed650c Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 4 Aug 2019 20:02:01 -0400 Subject: [PATCH 29/50] Replace hard coded timestamp with time.time() --- tests/util_tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/util_tests.py b/tests/util_tests.py index 9855a05a4..721697732 100644 --- a/tests/util_tests.py +++ b/tests/util_tests.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import time + from chai import Chai from arrow import util @@ -6,7 +8,7 @@ class UtilTests(Chai): def test_is_timestamp(self): - timestamp_float = 1563047716.958061 + timestamp_float = time.time() timestamp_int = int(timestamp_float) self.assertTrue(util.is_timestamp(timestamp_int)) From 4c360cb24aff86d573a508c5d015c156c23d346e Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Fri, 9 Aug 2019 16:57:23 -0400 Subject: [PATCH 30/50] Tweaked tzinfo parser regex and added some TODOs --- arrow/parser.py | 53 ++++++++++++++++++++++--------------------- tests/parser_tests.py | 3 ++- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 10b60c189..6f69aee1b 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -32,14 +32,9 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") - # _TZ_RE_ZZ = re.compile(r"^[+\-]\d{2}:(\d{2})?$|^ZZ?Z?$") - # _TZ_RE_Z = re.compile(r"^[+\-]\d{2}(\d{2})?$|^ZZ?Z?$") - - # _TZ_RE_ZZ = re.compile(r"^[+\-]\d{2}:(\d{2})?$|Z") - # _TZ_RE_Z = re.compile(r"^[+\-]\d{2}(\d{2})?$|Z") - - _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?|Z") - # _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?") + # TODO: +07 is not possible with colon, fix regex + _TZ_RE_ZZ = re.compile(r"[+\-]\d{2}:(\d{2})?|Z") + _TZ_RE_Z = re.compile(r"[+\-]\d{2}(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") # TODO: test timestamp thoroughly @@ -64,10 +59,8 @@ class DateTimeParser(object): "s": _ONE_OR_TWO_DIGIT_RE, "X": _TIMESTAMP_RE, "ZZZ": _TZ_NAME_RE, - # "ZZ": _TZ_RE_ZZ, - # "Z": _TZ_RE_Z, - "ZZ": _TZ_RE, - "Z": _TZ_RE, + "ZZ": _TZ_RE_ZZ, + "Z": _TZ_RE_Z, "S": _ONE_OR_MORE_DIGIT_RE, } @@ -148,7 +141,15 @@ def parse_iso(self, datetime_string): "YYYY", ] + # TODO: add test that accounts for someone adding +Z or -Z to the datetime string vs just Z if has_time: + # TODO: write a test for this (more than one Z in datetime string) + if "Z" in datetime_string and datetime_string.count("Z") > 1: + # TODO: improve error message + raise ParserError( + "More than one 'Z' provided in the datetime string. Please pass in a single Z to denote the UTC timezone." + ) + # Z is ignored entirely because fromdatetime defaults to UTC in arrow.py if datetime_string[-1] == "Z": datetime_string = datetime_string[:-1] @@ -205,7 +206,7 @@ def parse_iso(self, datetime_string): # TODO: reduce set of date formats for basic? test earlier? if has_time and has_tz: - # Add "Z" to format strings to indicate to _parse_tokens + # Add "Z" to format strings to indicate to _parse_token # that a timezone needs to be parsed formats = ["{}{}".format(f, tz_format) for f in formats] @@ -299,13 +300,11 @@ def _generate_pattern_re(self, fmt): # Reference: https://stackoverflow.com/q/14232931/3820660 starting_word_boundary = r"(? Date: Sat, 10 Aug 2019 06:11:28 -0400 Subject: [PATCH 31/50] Added a comment to tzinfoparser --- arrow/parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arrow/parser.py b/arrow/parser.py index 6f69aee1b..6720ea1cc 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -456,6 +456,7 @@ def _choice_re(choices, flags=0): class TzinfoParser(object): + # TODO: align this with the TZ_RE_Z and TZ_RE_ZZ above # TODO: test this REGEX _TZINFO_RE = re.compile(r"^([+\-])?(\d{2}):?(\d{2})?$") From 4ed43aeb101daaec4e55dd553262410ecaf3c36a Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Tue, 13 Aug 2019 17:18:06 +0300 Subject: [PATCH 32/50] Finalized regex changes --- arrow/parser.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 6720ea1cc..4e797a70b 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -32,9 +32,12 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") - # TODO: +07 is not possible with colon, fix regex - _TZ_RE_ZZ = re.compile(r"[+\-]\d{2}:(\d{2})?|Z") - _TZ_RE_Z = re.compile(r"[+\-]\d{2}(\d{2})?|Z") + # https://regex101.com/r/ifOZxu/4 + _TZ_RE_Z = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") + # https://regex101.com/r/ifOZxu/5 + _TZ_RE_ZZ = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") + # _TZ_RE_ZZ = re.compile(r"[\+\-]\d{2}:(\d{2})?|Z") + # _TZ_RE_Z = re.compile(r"[\+\-]\d{2}(\d{2})?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") # TODO: test timestamp thoroughly @@ -159,7 +162,7 @@ def parse_iso(self, datetime_string): else: date_string, time_string = datetime_string.split("T", 1) - time_parts = re.split(r"[+\-]", time_string, 1) + time_parts = re.split(r"[\+\-]", time_string, 1) colon_count = time_parts[0].count(":") is_basic_time_format = colon_count == 0 @@ -176,11 +179,12 @@ def parse_iso(self, datetime_string): tz_format = "ZZ" + # TODO: use regex to determine if something is basic format has_tz = len(time_parts) > 1 has_hours = len(time_parts[0]) == 2 has_minutes = colon_count == 1 or len(time_parts[0]) == 4 has_seconds = colon_count == 2 or len(time_parts[0]) == 6 - has_subseconds = re.search("[.,]", time_parts[0]) + has_subseconds = re.search(r"[\.,]", time_parts[0]) if has_subseconds: time_string = "HH:mm:ss{}S".format(has_subseconds.group()) @@ -255,7 +259,7 @@ def _generate_pattern_re(self, fmt): # Any number of S is the same as one. # TODO: allow users to specify the number of digits to parse - escaped_fmt = re.sub("S+", "S", escaped_fmt) + escaped_fmt = re.sub(r"S+", "S", escaped_fmt) escaped_data = re.findall(self._ESCAPE_RE, fmt) @@ -458,7 +462,8 @@ def _choice_re(choices, flags=0): class TzinfoParser(object): # TODO: align this with the TZ_RE_Z and TZ_RE_ZZ above # TODO: test this REGEX - _TZINFO_RE = re.compile(r"^([+\-])?(\d{2}):?(\d{2})?$") + # https://regex101.com/r/ifOZxu/3 + _TZINFO_RE = re.compile(r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$") @classmethod def parse(cls, tzinfo_string): From 87684c142e65db38a96c9e279ddb46a640f6a2cf Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Tue, 13 Aug 2019 18:16:30 +0300 Subject: [PATCH 33/50] Added remaining regex tests --- arrow/parser.py | 11 +++++----- tests/parser_tests.py | 49 ++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 4e797a70b..f35f7254d 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -32,12 +32,11 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") + # TODO: test someone passing +Z or -Z # https://regex101.com/r/ifOZxu/4 - _TZ_RE_Z = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") + _TZ_Z_RE = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") # https://regex101.com/r/ifOZxu/5 - _TZ_RE_ZZ = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") - # _TZ_RE_ZZ = re.compile(r"[\+\-]\d{2}:(\d{2})?|Z") - # _TZ_RE_Z = re.compile(r"[\+\-]\d{2}(\d{2})?|Z") + _TZ_ZZ_RE = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") # TODO: test timestamp thoroughly @@ -62,8 +61,8 @@ class DateTimeParser(object): "s": _ONE_OR_TWO_DIGIT_RE, "X": _TIMESTAMP_RE, "ZZZ": _TZ_NAME_RE, - "ZZ": _TZ_RE_ZZ, - "Z": _TZ_RE_Z, + "ZZ": _TZ_ZZ_RE, + "Z": _TZ_Z_RE, "S": _ONE_OR_MORE_DIGIT_RE, } diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 9975ff9d3..74f5d0fdf 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -582,30 +582,31 @@ def test_digits(self): parser.DateTimeParser._FOUR_DIGIT_RE.findall("1234-56"), ["1234"] ) - # def test_tz(self): - # tz_re = parser.DateTimeParser._TZ_RE - # - # self.assertEqual( - # tz_re.findall("-07:00"), ["-07", "00"] - # ) - # - # self.assertEqual( - # tz_re.findall("+07:00"), ["+07:00"] - # ) - # - # self.assertEqual( - # tz_re.findall("-0700"), ["-0700"] - # ) - # - # self.assertEqual( - # tz_re.findall("+0700"), ["+0700"] - # ) - # - # self.assertEqual( - # tz_re.findall("Z"), ["Z"] - # ) - - # what about +Z? + def test_tz(self): + tz_z_re = parser.DateTimeParser._TZ_Z_RE + self.assertEqual(tz_z_re.findall("-0700"), [("-", "07", "00")]) + self.assertEqual(tz_z_re.findall("+07"), [("+", "07", "")]) + self.assertTrue(tz_z_re.search("15/01/2019T04:05:06.789120Z") is not None) + self.assertTrue(tz_z_re.search("15/01/2019T04:05:06.789120") is None) + + tz_zz_re = parser.DateTimeParser._TZ_ZZ_RE + self.assertEqual(tz_zz_re.findall("-07:00"), [("-", "07", "00")]) + self.assertEqual(tz_zz_re.findall("+07"), [("+", "07", "")]) + self.assertTrue(tz_zz_re.search("15/01/2019T04:05:06.789120Z") is not None) + self.assertTrue(tz_zz_re.search("15/01/2019T04:05:06.789120") is None) + + tz_name_re = parser.DateTimeParser._TZ_NAME_RE + self.assertEqual(tz_name_re.findall("Europe/Warsaw"), ["Europe/Warsaw"]) + self.assertEqual(tz_name_re.findall("GMT"), ["GMT"]) + + def test_timestamp(self): + timestamp_re = parser.DateTimeParser._TIMESTAMP_RE + self.assertEqual( + timestamp_re.findall("1565707550.452729"), ["1565707550.452729"] + ) + self.assertEqual(timestamp_re.findall("1565707550"), ["1565707550"]) + self.assertEqual(timestamp_re.findall("1565707550."), []) + self.assertEqual(timestamp_re.findall(".1565707550"), []) class DateTimeParserISOTests(Chai): From 62e8ced0f3bf37a818d9341cd811a29a9f70931d Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Tue, 13 Aug 2019 18:25:39 +0300 Subject: [PATCH 34/50] Added a few comments --- arrow/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index f35f7254d..e1d30f2e2 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -23,7 +23,6 @@ class DateTimeParser(object): _FORMAT_RE = re.compile( r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|X)" ) - # TODO: add support for inner brackets like "2018-03-09 8 [[h]] 40" _ESCAPE_RE = re.compile(r"\[[^\[\]]*\]") _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}") @@ -32,7 +31,6 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") - # TODO: test someone passing +Z or -Z # https://regex101.com/r/ifOZxu/4 _TZ_Z_RE = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") # https://regex101.com/r/ifOZxu/5 @@ -185,6 +183,8 @@ def parse_iso(self, datetime_string): has_seconds = colon_count == 2 or len(time_parts[0]) == 6 has_subseconds = re.search(r"[\.,]", time_parts[0]) + # Add tests for someone mixing basic format with colon-separated + if has_subseconds: time_string = "HH:mm:ss{}S".format(has_subseconds.group()) elif has_seconds: From 46f30b2a6908be36283de5c4daac943cb5bbe565 Mon Sep 17 00:00:00 2001 From: systemcatch <30196510+systemcatch@users.noreply.github.com> Date: Fri, 16 Aug 2019 17:07:19 +0100 Subject: [PATCH 35/50] Make error messages clearer and add extra DDDD test --- arrow/parser.py | 26 +++++++++++--------------- tests/parser_tests.py | 4 ++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 3432b5213..f73448c70 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -102,11 +102,16 @@ def parse_iso(self, datetime_string): has_t_divider = "T" in datetime_string num_spaces = datetime_string.count(" ") - if (has_space_divider and num_spaces != 1) or ( - has_t_divider and num_spaces > 0 - ): + if has_space_divider and num_spaces != 1: raise ParserError( - "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( + "Expected an ISO 8601-like string, but was given '{}' which contains multiple spaces. Try passing in a format string to resolve this.".format( + datetime_string + ) + ) + + if has_t_divider and num_spaces > 0: + raise ParserError( + "Expected an ISO 8601-like string, but was given '{}' which contains \"T\" separator and spaces. Try passing in a format string to resolve this.".format( datetime_string ) ) @@ -115,7 +120,6 @@ def parse_iso(self, datetime_string): has_tz = False # TODO: add tests for all the new formats, especially basic format - # IDEA: should YYYY MM DD style be accepted here? # date formats (ISO-8601 and others) to test against formats = [ "YYYY-MM-DD", @@ -185,7 +189,6 @@ def parse_iso(self, datetime_string): # that a timezone needs to be parsed formats = ["{}Z".format(f) for f in formats] - # TODO: make thrown error messages less cryptic and more informative return self._parse_multiformat(datetime_string, formats) def parse(self, datetime_string, fmt): @@ -198,9 +201,7 @@ def parse(self, datetime_string, fmt): match = fmt_pattern_re.search(datetime_string) if match is None: raise ParserError( - "Failed to match '{}' when parsing '{}'".format( - fmt_pattern_re.pattern, datetime_string - ) + "Failed to match '{}' when parsing '{}'".format(fmt, datetime_string) ) parts = {} @@ -354,7 +355,6 @@ def _build_datetime(parts): tz_utc = tz.tzutc() return datetime.fromtimestamp(timestamp, tz=tz_utc) - # TODO: add tests for this! day_of_year = parts.get("day_of_year") if day_of_year: @@ -380,10 +380,6 @@ def _build_datetime(parts): ) ) - # TODO: write test for 2015-366 - # TODO: should we throw an error or mimic datetime? - # datetime.strptime("2015-366", "%Y-%j") - # Changes year: datetime.datetime(2016, 1, 1, 0, 0) parts["year"] = dt.year parts["month"] = dt.month parts["day"] = dt.day @@ -420,7 +416,7 @@ def _parse_multiformat(self, string, formats): if _datetime is None: raise ParserError( - "Could not match input '{}' to any of the supported formats: {}".format( + "Could not match input '{}' to any of the formats provided: {}".format( string, ", ".join(formats) ) ) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f25bd1974..e90b5d940 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -582,6 +582,10 @@ def test_YYYY_DDDD(self): with self.assertRaises(ParserError): self.parser.parse_iso("1998-456") + # datetime.strptime("2015-366", "%Y-%j") + # Changes year: datetime.datetime(2016, 1, 1, 0, 0) + self.assertEqual(self.parser.parse_iso("2015-366"), datetime(2016, 1, 1)) + def test_YYYY_DDDD_HH_mm_ssZ(self): self.assertEqual( From 4e36c9ad8ed9022268ec3e3d7a9a313b48ae3fd3 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Fri, 16 Aug 2019 21:56:48 +0300 Subject: [PATCH 36/50] Fixed a critical bug with X token and float timestamps; revamped basic format to use regex --- arrow/arrow.py | 2 +- arrow/parser.py | 78 ++++++++++++++++++++---------------------- docs/index.rst | 6 ++-- tests/arrow_tests.py | 2 +- tests/parser_tests.py | 79 +++++++++++++++++++++++++++++++++++-------- 5 files changed, 106 insertions(+), 61 deletions(-) diff --git a/arrow/arrow.py b/arrow/arrow.py index 19bbbe0b4..cb52a1fa9 100644 --- a/arrow/arrow.py +++ b/arrow/arrow.py @@ -1352,7 +1352,7 @@ def _get_iteration_params(cls, end, limit): if end is None: if limit is None: - raise Exception("one of 'end' or 'limit' is required") + raise ValueError("one of 'end' or 'limit' is required") return cls.max, limit diff --git a/arrow/parser.py b/arrow/parser.py index e1d30f2e2..f613c2c54 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -36,7 +36,10 @@ class DateTimeParser(object): # https://regex101.com/r/ifOZxu/5 _TZ_ZZ_RE = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") + # TODO: test timestamp with natural language processing. I think we may have to remove the ^...$ _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") + # https://regex101.com/r/LDMBVi/2 + _TIME_RE = re.compile(r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$") # TODO: test timestamp thoroughly # TODO: test new regular expressions @@ -121,6 +124,7 @@ def parse_iso(self, datetime_string): # TODO: add tests for all the new formats, especially basic format # IDEA: should YYYY MM DD style be accepted here? + # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) # date formats (ISO-8601 and others) to test against formats = [ "YYYY-MM-DD", @@ -141,73 +145,66 @@ def parse_iso(self, datetime_string): "YYYY", ] - # TODO: add test that accounts for someone adding +Z or -Z to the datetime string vs just Z if has_time: - # TODO: write a test for this (more than one Z in datetime string) - if "Z" in datetime_string and datetime_string.count("Z") > 1: - # TODO: improve error message - raise ParserError( - "More than one 'Z' provided in the datetime string. Please pass in a single Z to denote the UTC timezone." - ) - - # Z is ignored entirely because fromdatetime defaults to UTC in arrow.py - if datetime_string[-1] == "Z": - datetime_string = datetime_string[:-1] if has_space_divider: date_string, time_string = datetime_string.split(" ", 1) else: date_string, time_string = datetime_string.split("T", 1) - time_parts = re.split(r"[\+\-]", time_string, 1) - colon_count = time_parts[0].count(":") + time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) + # TODO: is it a bug that we are just checking the timeparts for colons? this allows users to mix basic and extended like: 20130203 04:05:06.78912Z + time_colon_count = time_parts[0].count(":") - is_basic_time_format = colon_count == 0 + is_basic_time_format = time_colon_count == 0 tz_format = "Z" - # tz offset is present - if len(time_parts) == 2: - tz_offset = time_parts[1] + # use 'ZZ' token instead since tz offset is present in non-basic format + if len(time_parts) == 2 and ":" in time_parts[1]: + # TODO: should we throw an error if someone mixes non-basic tz (e.g. 07:00) with a basic datetime string? + # I thought so at first, but then I thought it was too much error checking. - if ":" in tz_offset: - # TODO: add error message - if is_basic_time_format: - raise ParserError + tz_format = "ZZ" - tz_format = "ZZ" + time_components = self._TIME_RE.match(time_parts[0]) - # TODO: use regex to determine if something is basic format - has_tz = len(time_parts) > 1 - has_hours = len(time_parts[0]) == 2 - has_minutes = colon_count == 1 or len(time_parts[0]) == 4 - has_seconds = colon_count == 2 or len(time_parts[0]) == 6 - has_subseconds = re.search(r"[\.,]", time_parts[0]) + if time_components is None: + raise ParserError( + "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format.".format() + ) - # Add tests for someone mixing basic format with colon-separated + hours, minutes, seconds, subseconds_sep, subseconds = ( + time_components.groups() + ) + + has_tz = len(time_parts) == 2 + has_hours = hours is not None + has_minutes = minutes is not None + has_seconds = seconds is not None + has_subseconds = subseconds is not None + + time_sep = "" if is_basic_time_format else ":" if has_subseconds: - time_string = "HH:mm:ss{}S".format(has_subseconds.group()) + time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( + time_sep=time_sep, subseconds_sep=subseconds_sep + ) elif has_seconds: - time_string = "HH:mm:ss" + time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) elif has_minutes: - time_string = "HH:mm" + time_string = "HH{time_sep}mm".format(time_sep=time_sep) elif has_hours: time_string = "HH" else: raise ParserError( - "Invalid time component provided. Please specify a format or provide a time in the form 'HH:mm:ss.S', 'HH:mm:ss', 'HH:mm', or 'HH'." + "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." ) - if is_basic_time_format: - time_string = time_string.replace(":", "") - if has_space_divider: formats = ["{} {}".format(f, time_string) for f in formats] else: formats = ["{}T{}".format(f, time_string) for f in formats] - # TODO: reduce set of date formats for basic? test earlier? - if has_time and has_tz: # Add "Z" to format strings to indicate to _parse_token # that a timezone needs to be parsed @@ -360,7 +357,7 @@ def _parse_token(self, token, value, parts): parts["microsecond"] = int(value[:6]) + rounding elif token == "X": - parts["timestamp"] = int(value) + parts["timestamp"] = float(value) elif token in ["ZZZ", "ZZ", "Z"]: parts["tzinfo"] = TzinfoParser.parse(value) @@ -459,8 +456,7 @@ def _choice_re(choices, flags=0): class TzinfoParser(object): - # TODO: align this with the TZ_RE_Z and TZ_RE_ZZ above - # TODO: test this REGEX + # TODO: test against full timezone DB # https://regex101.com/r/ifOZxu/3 _TZINFO_RE = re.compile(r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$") diff --git a/docs/index.rst b/docs/index.rst index 8273a6f62..14a62f1e1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -361,11 +361,11 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ |**Timezone** |ZZZ |Asia/Baku, Europe/Warsaw, GMT ... [#t4]_ | +--------------------------------+--------------+-------------------------------------------+ -| |ZZ |-07:00, -06:00 ... +06:00, +07:00 | +| |ZZ |-07:00, -06:00 ... +06:00, +07:00, +08, Z | +--------------------------------+--------------+-------------------------------------------+ -| |Z |-0700, -0600 ... +0600, +0700 | +| |Z |-0700, -0600 ... +0600, +0700, +08, Z | +--------------------------------+--------------+-------------------------------------------+ -|**Timestamp** |X |1381685817 | +|**Timestamp** |X |1381685817, 1381685817.915482 ... | +--------------------------------+--------------+-------------------------------------------+ .. rubric:: Footnotes diff --git a/tests/arrow_tests.py b/tests/arrow_tests.py index c88959b0d..87c2048d0 100644 --- a/tests/arrow_tests.py +++ b/tests/arrow_tests.py @@ -1812,5 +1812,5 @@ def test_get_iteration_params(self): ) self.assertEqual(arrow.Arrow._get_iteration_params(100, 120), (100, 120)) - with self.assertRaises(Exception): + with self.assertRaises(ValueError): arrow.Arrow._get_iteration_params(None, None) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 74f5d0fdf..db92ab848 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -133,7 +133,7 @@ def test_YY_and_YYYY_format_list(self): ) # regression test for issue #447 - def test_parse_timestamp_token(self): + def test_timestamp_format_list(self): # should not match on the "X" token self.assertEqual( self.parser.parse( @@ -200,9 +200,13 @@ def test_parse_year_two_digit(self): def test_parse_timestamp(self): tz_utc = tz.tzutc() - timestamp = int(time.time()) - self.expected = datetime.fromtimestamp(timestamp, tz=tz_utc) - self.assertEqual(self.parser.parse(str(timestamp), "X"), self.expected) + int_timestamp = int(time.time()) + self.expected = datetime.fromtimestamp(int_timestamp, tz=tz_utc) + self.assertEqual(self.parser.parse(str(int_timestamp), "X"), self.expected) + + float_timestamp = time.time() + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual(self.parser.parse(str(float_timestamp), "X"), self.expected) def test_parse_names(self): @@ -269,7 +273,13 @@ def test_parse_tz_name_zzz(self): # note that offsets are not timezones with self.assertRaises(ParserError): - self.parser.parse("2013-01-01 +1000", "YYYY-MM-DD ZZZ") + self.parser.parse("2013-01-01 12:30:45.9+1000", "YYYY-MM-DDZZZ") + + with self.assertRaises(ParserError): + self.parser.parse("2013-01-01 12:30:45.9+10:00", "YYYY-MM-DDZZZ") + + with self.assertRaises(ParserError): + self.parser.parse("2013-01-01 12:30:45.9-10", "YYYY-MM-DDZZZ") def test_parse_subsecond(self): # TODO: make both test_parse_subsecond functions in Parse and ParseISO @@ -407,7 +417,7 @@ def test_parse_with_extra_words_at_start_and_end_valid(self): self.assertEqual( self.parser.parse( - "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", + "Meet me at 2016-05-16T04:05:06.789120 at the restaurant.", "YYYY-MM-DDThh:mm:ss.S", ), datetime(2016, 5, 16, 4, 5, 6, 789120), @@ -415,7 +425,7 @@ def test_parse_with_extra_words_at_start_and_end_valid(self): self.assertEqual( self.parser.parse( - "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", + "Meet me at 2016-05-16 04:05:06.789120 at the restaurant.", "YYYY-MM-DD hh:mm:ss.S", ), datetime(2016, 5, 16, 4, 5, 6, 789120), @@ -634,6 +644,11 @@ def test_YYYY_DDDD_HH_mm_ssZ(self): datetime(2013, 2, 5, 4, 5, 6, tzinfo=tz.tzoffset(None, 3600)), ) + self.assertEqual( + self.parser.parse_iso("2013-036 04:05:06Z"), + datetime(2013, 2, 5, 4, 5, 6, tzinfo=tz.tzutc()), + ) + def test_YYYY_MM_DDDD(self): with self.assertRaises(ParserError): self.parser.parse_iso("2014-05-125") @@ -792,14 +807,34 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 3600)), ) - # parse_iso sets tzinfo to None if Z is passed, so a default datetime - # object is sufficient to compare against. - # Arrow adds +00:00 when get() is called directly and tzinfo is None self.assertEqual( self.parser.parse_iso("2013-02-03 04:05:06.78912Z"), - datetime(2013, 2, 3, 4, 5, 6, 789120), + datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzutc()), ) + def test_invalid_Z(self): + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912zz") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912Zz") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912ZZ") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912+Z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912-Z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912 Z") + def test_parse_subsecond(self): # TODO: make both test_parse_subsecond functions in Parse and ParseISO # tests use the same expected objects (use pytest fixtures) @@ -873,8 +908,8 @@ def test_parse_iso_with_extra_words_at_start_and_end_invalid(self): "2016-05-16T04:05:06.789120ZblahZ", "2016-05-16T04:05:06.789120Zblah", "2016-05-16T04:05:06.789120blahZ", - "Meet me at 2016-05-16T04:05:06.789120 on Tuesday", - "Meet me at 2016-05-16 04:05:06.789120 on Tuesday", + "Meet me at 2016-05-16T04:05:06.789120 at the restaurant.", + "Meet me at 2016-05-16 04:05:06.789120 at the restaurant.", ] for ti in test_inputs: @@ -913,7 +948,8 @@ def test_iso8601_basic_format(self): ) self.assertEqual( - self.parser.parse_iso("20180517T105513Z"), datetime(2018, 5, 17, 10, 55, 13) + self.parser.parse_iso("20180517T105513Z"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzutc()), ) self.assertEqual( @@ -921,6 +957,9 @@ def test_iso8601_basic_format(self): datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), ) + # ordinal in basic format: YYYYDDDD + self.assertEqual(self.parser.parse_iso("1998136"), datetime(1998, 5, 16)) + # too many digits in date with self.assertRaises(ParserError): self.parser.parse_iso("201860517T105513Z") @@ -947,11 +986,21 @@ def test_parse_utc(self): def test_parse_iso(self): - # TODO: add tests! self.assertEqual(self.parser.parse("01:00"), tz.tzoffset(None, 3600)) + self.assertEqual( + self.parser.parse("11:35"), tz.tzoffset(None, 11 * 3600 + 2100) + ) self.assertEqual(self.parser.parse("+01:00"), tz.tzoffset(None, 3600)) self.assertEqual(self.parser.parse("-01:00"), tz.tzoffset(None, -3600)) + self.assertEqual(self.parser.parse("0100"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("+0100"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("-0100"), tz.tzoffset(None, -3600)) + + self.assertEqual(self.parser.parse("01"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("+01"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("-01"), tz.tzoffset(None, -3600)) + def test_parse_str(self): self.assertEqual(self.parser.parse("US/Pacific"), tz.gettz("US/Pacific")) From 6f13d10e3367217b171cbe2a8f1dff9eb4344917 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 00:45:07 +0300 Subject: [PATCH 37/50] Added tests and removed stripping of whitespace --- arrow/parser.py | 14 +++------- docs/index.rst | 3 ++- tests/factory_tests.py | 25 +++++++++++++----- tests/parser_tests.py | 59 ++++++++++++++++++++++++++++-------------- 4 files changed, 64 insertions(+), 37 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index f613c2c54..fb5b1fdde 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -31,18 +31,14 @@ class DateTimeParser(object): _TWO_DIGIT_RE = re.compile(r"\d{2}") _THREE_DIGIT_RE = re.compile(r"\d{3}") _FOUR_DIGIT_RE = re.compile(r"\d{4}") - # https://regex101.com/r/ifOZxu/4 _TZ_Z_RE = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") - # https://regex101.com/r/ifOZxu/5 _TZ_ZZ_RE = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") - # TODO: test timestamp with natural language processing. I think we may have to remove the ^...$ + # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will + # break cases like "15 Jul 2000" and a format list (see issue #447) _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") - # https://regex101.com/r/LDMBVi/2 _TIME_RE = re.compile(r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$") - # TODO: test timestamp thoroughly - # TODO: test new regular expressions _BASE_INPUT_RE_MAP = { "YYYY": _FOUR_DIGIT_RE, "YY": _TWO_DIGIT_RE, @@ -101,8 +97,7 @@ def __init__(self, locale="en_us", cache_size=0): # TODO: since we support more than ISO-8601, we should rename this function # IDEA: break into multiple functions def parse_iso(self, datetime_string): - # strip leading and trailing whitespace - datetime_string = datetime_string.strip() + # TODO: add a flag to normalize whitespace (useful in logs, ref issue #421) has_space_divider = " " in datetime_string has_t_divider = "T" in datetime_string @@ -170,7 +165,7 @@ def parse_iso(self, datetime_string): if time_components is None: raise ParserError( - "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format.".format() + "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." ) hours, minutes, seconds, subseconds_sep, subseconds = ( @@ -457,7 +452,6 @@ def _choice_re(choices, flags=0): class TzinfoParser(object): # TODO: test against full timezone DB - # https://regex101.com/r/ifOZxu/3 _TZINFO_RE = re.compile(r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$") @classmethod diff --git a/docs/index.rst b/docs/index.rst index 14a62f1e1..78fcea57c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -365,7 +365,7 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ | |Z |-0700, -0600 ... +0600, +0700, +08, Z | +--------------------------------+--------------+-------------------------------------------+ -|**Timestamp** |X |1381685817, 1381685817.915482 ... | +|**Timestamp** |X |1381685817, 1381685817.915482 ... [#t5]_ | +--------------------------------+--------------+-------------------------------------------+ .. rubric:: Footnotes @@ -374,6 +374,7 @@ Use the following tokens in parsing and formatting. Note that they're not the s .. [#t2] localization support only for formatting .. [#t3] the result is truncated to microseconds, with `half-to-even rounding `_. .. [#t4] timezone names from `tz database `_ provided via dateutil package +.. [#t5] this token cannot be used for parsing timestamps out of natural language strings due to compatibility reasons Escaping Formats ~~~~~~~~~~~~~~~~ diff --git a/tests/factory_tests.py b/tests/factory_tests.py index 4ea4d9acf..361e4257d 100644 --- a/tests/factory_tests.py +++ b/tests/factory_tests.py @@ -6,6 +6,7 @@ from dateutil import tz from arrow import factory, util +from arrow.parser import ParserError def assertDtEqual(dt1, dt2, within=10): @@ -45,17 +46,27 @@ def test_struct_time(self): def test_one_arg_timestamp(self): - timestamp = 12345 - timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) + int_timestamp = int(time.time()) + timestamp_dt = datetime.utcfromtimestamp(int_timestamp).replace( + tzinfo=tz.tzutc() + ) + + self.assertEqual(self.factory.get(int_timestamp), timestamp_dt) - self.assertEqual(self.factory.get(timestamp), timestamp_dt) + with self.assertRaises(ParserError): + self.factory.get(str(int_timestamp)) + + float_timestamp = time.time() + timestamp_dt = datetime.utcfromtimestamp(float_timestamp).replace( + tzinfo=tz.tzutc() + ) - timestamp = 123.45 - timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) + self.assertEqual(self.factory.get(float_timestamp), timestamp_dt) - self.assertEqual(self.factory.get(timestamp), timestamp_dt) + with self.assertRaises(ParserError): + self.factory.get(str(float_timestamp)) - # Issue 216 + # Regression test for issue #216 timestamp = 99999999999999999999999999 # Python 3 raises `OverflowError`, Python 2 raises `ValueError` with self.assertRaises((OverflowError, ValueError)): diff --git a/tests/parser_tests.py b/tests/parser_tests.py index db92ab848..f0ef85825 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -208,6 +208,20 @@ def test_parse_timestamp(self): self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) self.assertEqual(self.parser.parse(str(float_timestamp), "X"), self.expected) + # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will + # break cases like "15 Jul 2000" and a format list (see issue #447) + with self.assertRaises(ParserError): + natural_lang_string = "Meet me at {} at the restaurant.".format( + float_timestamp + ) + self.parser.parse(natural_lang_string, "X") + + with self.assertRaises(ParserError): + self.parser.parse("1565982019.", "X") + + with self.assertRaises(ParserError): + self.parser.parse(".1565982019", "X") + def test_parse_names(self): self.expected = datetime(2012, 1, 1) @@ -618,6 +632,32 @@ def test_timestamp(self): self.assertEqual(timestamp_re.findall("1565707550."), []) self.assertEqual(timestamp_re.findall(".1565707550"), []) + def test_time(self): + time_re = parser.DateTimeParser._TIME_RE + time_seperators = [":", ""] + + for sep in time_seperators: + self.assertEqual(time_re.findall("12"), [("12", "", "", "", "")]) + self.assertEqual( + time_re.findall("12{sep}35".format(sep=sep)), [("12", "35", "", "", "")] + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46".format(sep=sep)), + [("12", "35", "46", "", "")], + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46.952313".format(sep=sep)), + [("12", "35", "46", ".", "952313")], + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46,952313".format(sep=sep)), + [("12", "35", "46", ",", "952313")], + ) + + self.assertEqual(time_re.findall("12:"), []) + self.assertEqual(time_re.findall("12:35:46."), []) + self.assertEqual(time_re.findall("12:35:46,"), []) + class DateTimeParserISOTests(Chai): def setUp(self): @@ -916,25 +956,6 @@ def test_parse_iso_with_extra_words_at_start_and_end_invalid(self): with self.assertRaises(ParserError): self.parser.parse_iso(ti) - def test_parse_iso_with_leading_and_trailing_whitespace(self): - self.assertEqual(self.parser.parse_iso(" 2016"), datetime(2016, 1, 1)) - - self.assertEqual(self.parser.parse_iso("2016 "), datetime(2016, 1, 1)) - - self.assertEqual( - self.parser.parse_iso(" 2016 "), datetime(2016, 1, 1) - ) - - self.assertEqual( - self.parser.parse_iso(" 2016-05-16 04:05:06.789120 "), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) - - self.assertEqual( - self.parser.parse_iso(" 2016-05-16T04:05:06.789120 "), - datetime(2016, 5, 16, 4, 5, 6, 789120), - ) - def test_iso8601_basic_format(self): self.assertEqual(self.parser.parse_iso("20180517"), datetime(2018, 5, 17)) From d9cb79e5a183c3204d9c20b8f63309277cf154c6 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 01:03:11 +0300 Subject: [PATCH 38/50] Increment version --- arrow/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/_version.py b/arrow/_version.py index 141826d55..9da2f8fcc 100644 --- a/arrow/_version.py +++ b/arrow/_version.py @@ -1 +1 @@ -__version__ = "0.14.5" +__version__ = "0.15.0" From d7e083bef18713925992cea760613749c364893d Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 03:12:43 +0300 Subject: [PATCH 39/50] Fixed float timestamp on py27, removed warnings, changed parser error to inherit from value error --- arrow/factory.py | 26 -------------------------- arrow/parser.py | 3 +-- tests/parser_tests.py | 8 ++++++-- 3 files changed, 7 insertions(+), 30 deletions(-) diff --git a/arrow/factory.py b/arrow/factory.py index 246ab5ce6..ef8e8972d 100644 --- a/arrow/factory.py +++ b/arrow/factory.py @@ -9,7 +9,6 @@ from __future__ import absolute_import import calendar -import warnings from datetime import date, datetime, tzinfo from time import struct_time @@ -20,21 +19,6 @@ from arrow.util import is_timestamp, isstr -class ArrowParseWarning(DeprecationWarning): - """Raised when arrow.get() is passed a string with no formats and matches incorrectly - on one of the default formats. - - e.g. - arrow.get('blabla2016') -> - arrow.get('13/4/2045') -> - - In version 0.15.0 this warning will become a ParserError. - """ - - -warnings.simplefilter("always", ArrowParseWarning) - - class ArrowFactory(object): """ A factory for generating :class:`Arrow ` objects. @@ -195,11 +179,6 @@ def get(self, *args, **kwargs): # (str) -> parse. elif isstr(arg): - warnings.warn( - "The .get() parsing method without a format string will parse more strictly in version 0.15.0." - "See https://github.com/crsmithdev/arrow/issues/612 for more details.", - ArrowParseWarning, - ) dt = parser.DateTimeParser(locale).parse_iso(arg) return self.type.fromdatetime(dt, tz) @@ -242,11 +221,6 @@ def get(self, *args, **kwargs): # (str, format) -> parse. elif isstr(arg_1) and (isstr(arg_2) or isinstance(arg_2, list)): - warnings.warn( - "The .get() parsing method with a format string will parse more strictly in version 0.15.0." - "See https://github.com/crsmithdev/arrow/issues/612 for more details.", - ArrowParseWarning, - ) dt = parser.DateTimeParser(locale).parse(args[0], args[1]) return self.type.fromdatetime(dt, tzinfo=tz) diff --git a/arrow/parser.py b/arrow/parser.py index 8a1bb181b..8223c3466 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -14,7 +14,7 @@ from backports.functools_lru_cache import lru_cache # pragma: no cover -class ParserError(RuntimeError): +class ParserError(ValueError): pass @@ -98,7 +98,6 @@ def __init__(self, locale="en_us", cache_size=0): # IDEA: break into multiple functions def parse_iso(self, datetime_string): # TODO: add a flag to normalize whitespace (useful in logs, ref issue #421) - has_space_divider = " " in datetime_string has_t_divider = "T" in datetime_string diff --git a/tests/parser_tests.py b/tests/parser_tests.py index f79d3cb29..917b69fab 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -202,11 +202,15 @@ def test_parse_timestamp(self): tz_utc = tz.tzutc() int_timestamp = int(time.time()) self.expected = datetime.fromtimestamp(int_timestamp, tz=tz_utc) - self.assertEqual(self.parser.parse(str(int_timestamp), "X"), self.expected) + self.assertEqual( + self.parser.parse("{:d}".format(int_timestamp), "X"), self.expected + ) float_timestamp = time.time() self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) - self.assertEqual(self.parser.parse(str(float_timestamp), "X"), self.expected) + self.assertEqual( + self.parser.parse("{:f}".format(float_timestamp), "X"), self.expected + ) # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will # break cases like "15 Jul 2000" and a format list (see issue #447) From 992038d6c67bd121c14184d4d9da39561d40026d Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 11:39:10 +0300 Subject: [PATCH 40/50] Added tests and cleaned up TODOs --- arrow/parser.py | 7 +------ tests/parser_tests.py | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 8223c3466..742c793f4 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -119,8 +119,6 @@ def parse_iso(self, datetime_string): has_time = has_space_divider or has_t_divider has_tz = False - # TODO: test basic format with timezone string without "+" - # TODO: add tests for all the new formats, especially basic format # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) # date formats (ISO-8601 and others) to test against @@ -151,7 +149,7 @@ def parse_iso(self, datetime_string): date_string, time_string = datetime_string.split("T", 1) time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) - # TODO: is it a bug that we are just checking the timeparts for colons? this allows users to mix basic and extended like: 20130203 04:05:06.78912Z + # TODO: should we prevent mixing basic and extended formats? would need to ensure that dates, times, and timezones are in same format time_colon_count = time_parts[0].count(":") is_basic_time_format = time_colon_count == 0 @@ -159,9 +157,6 @@ def parse_iso(self, datetime_string): # use 'ZZ' token instead since tz offset is present in non-basic format if len(time_parts) == 2 and ":" in time_parts[1]: - # TODO: should we throw an error if someone mixes non-basic tz (e.g. 07:00) with a basic datetime string? - # I thought so at first, but then I thought it was too much error checking. - tz_format = "ZZ" time_components = self._TIME_RE.match(time_parts[0]) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 917b69fab..9c1da2edc 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -972,8 +972,8 @@ def test_iso8601_basic_format(self): ) self.assertEqual( - self.parser.parse_iso("20180517T105513.84"), - datetime(2018, 5, 17, 10, 55, 13, 840000), + self.parser.parse_iso("20180517T105513.843456"), + datetime(2018, 5, 17, 10, 55, 13, 843456), ) self.assertEqual( @@ -981,14 +981,47 @@ def test_iso8601_basic_format(self): datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzutc()), ) + self.assertEqual( + self.parser.parse_iso("20180517T105513.843456-0700"), + datetime(2018, 5, 17, 10, 55, 13, 843456, tzinfo=tz.tzoffset(None, -25200)), + ) + self.assertEqual( self.parser.parse_iso("20180517T105513-0700"), datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), ) + self.assertEqual( + self.parser.parse_iso("20180517T105513-07"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + # mixing formats--this may raise a ParserError in the future + self.assertEqual( + self.parser.parse_iso("2018-05-17T105513-0700"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T10:55:13-07:00"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513-07:00"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + # ordinal in basic format: YYYYDDDD self.assertEqual(self.parser.parse_iso("1998136"), datetime(1998, 5, 16)) + # timezone requires +- seperator + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T1055130700") + + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T10551307") + # too many digits in date with self.assertRaises(ParserError): self.parser.parse_iso("201860517T105513Z") From fca652290d9944652b1c97200e9eff12f559197e Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 15:16:47 +0300 Subject: [PATCH 41/50] Added ordinal tests and added new error type to fix exception bubbling issue with ordinal dates --- arrow/parser.py | 31 +++++++++++++++++++------------ tests/parser_tests.py | 31 ++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 742c793f4..266b439f7 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -18,6 +18,15 @@ class ParserError(ValueError): pass +# Allows for ParserErrors to be propagated from _build_datetime() +# when day_of_year errors occur. +# Before this, the ParserErrors were caught by the try/except in +# _parse_multiformat() and the appropriate error message was not +# transmitted to the user. +class ParserMatchError(ParserError): + pass + + class DateTimeParser(object): _FORMAT_RE = re.compile( @@ -104,14 +113,14 @@ def parse_iso(self, datetime_string): num_spaces = datetime_string.count(" ") if has_space_divider and num_spaces != 1: raise ParserError( - "Expected an ISO 8601-like string, but was given '{}' which contains multiple spaces. Try passing in a format string to resolve this.".format( + "Expected an ISO 8601-like string, but was given '{}', which contains multiple spaces. Try passing in a format string to resolve this.".format( datetime_string ) ) if has_t_divider and num_spaces > 0: raise ParserError( - "Expected an ISO 8601-like string, but was given '{}' which contains \"T\" separator and spaces. Try passing in a format string to resolve this.".format( + "Expected an ISO 8601-like string, but was given '{}', which contains a 'T' separator and spaces. Try passing in a format string to resolve this.".format( datetime_string ) ) @@ -214,7 +223,7 @@ def parse(self, datetime_string, fmt): match = fmt_pattern_re.search(datetime_string) if match is None: - raise ParserError( + raise ParserMatchError( "Failed to match '{}' when parsing '{}'".format(fmt, datetime_string) ) @@ -363,23 +372,23 @@ def _build_datetime(parts): timestamp = parts.get("timestamp") - if timestamp: + if timestamp is not None: tz_utc = tz.tzutc() return datetime.fromtimestamp(timestamp, tz=tz_utc) day_of_year = parts.get("day_of_year") - if day_of_year: + if day_of_year is not None: year = parts.get("year") month = parts.get("month") if year is None: raise ParserError( - "Year component is required with the DDD and DDDD tokens" + "Year component is required with the DDD and DDDD tokens." ) if month is not None: raise ParserError( - "Month component is not allowed with the DDD and DDDD tokens" + "Month component is not allowed with the DDD and DDDD tokens." ) date_string = "{}-{}".format(year, day_of_year) @@ -387,9 +396,7 @@ def _build_datetime(parts): dt = datetime.strptime(date_string, "%Y-%j") except ValueError: raise ParserError( - "Expected a valid day of year, but received '{}'".format( - day_of_year - ) + "The provided day of year '{}' is invalid.".format(day_of_year) ) parts["year"] = dt.year @@ -423,12 +430,12 @@ def _parse_multiformat(self, string, formats): try: _datetime = self.parse(string, fmt) break - except ParserError: + except ParserMatchError: pass if _datetime is None: raise ParserError( - "Could not match input '{}' to any of the formats provided: {}".format( + "Could not match input '{}' to any of the following formats: {}".format( string, ", ".join(formats) ) ) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 9c1da2edc..bd13b6eb0 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -9,7 +9,7 @@ from dateutil import tz from arrow import parser -from arrow.parser import DateTimeParser, ParserError +from arrow.parser import DateTimeParser, ParserError, ParserMatchError class DateTimeParserTests(Chai): @@ -22,7 +22,7 @@ def test_parse_multiformat(self): mock_datetime = self.mock() - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserMatchError) self.expect(self.parser.parse).args("str", "fmt_b").returns(mock_datetime) result = self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -31,8 +31,8 @@ def test_parse_multiformat(self): def test_parse_multiformat_all_fail(self): - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserMatchError) + self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserMatchError) with self.assertRaises(ParserError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -174,12 +174,12 @@ def test_parse_unrecognized_token(self): def test_parse_parse_no_match(self): - with self.assertRaises(parser.ParserError): + with self.assertRaises(ParserError): self.parser.parse("01-01", "YYYY-MM-DD") def test_parse_separators(self): - with self.assertRaises(parser.ParserError): + with self.assertRaises(ParserError): self.parser.parse("1403549231", "YYYY-MM-DD") def test_parse_numbers(self): @@ -499,6 +499,7 @@ def test_parse_YYYY_MM_DDDD(self): with self.assertRaises(ParserError): self.parser.parse("2015-01-009", "YYYY-MM-DDDD") + # year is required with the DDD and DDDD tokens def test_parse_DDD_only(self): with self.assertRaises(ParserError): self.parser.parse("5", "DDD") @@ -681,9 +682,21 @@ def test_YYYY_DDDD(self): with self.assertRaises(ParserError): self.parser.parse_iso("1998-456") - # datetime.strptime("2015-366", "%Y-%j") - # Changes year: datetime.datetime(2016, 1, 1, 0, 0) - self.assertEqual(self.parser.parse_iso("2015-366"), datetime(2016, 1, 1)) + # 2016 is a leap year, so Feb 29 exists (leap day) + self.assertEqual(self.parser.parse_iso("2016-059"), datetime(2016, 2, 28)) + self.assertEqual(self.parser.parse_iso("2016-060"), datetime(2016, 2, 29)) + self.assertEqual(self.parser.parse_iso("2016-061"), datetime(2016, 3, 1)) + + # 2017 is not a leap year, so Feb 29 does not exist + self.assertEqual(self.parser.parse_iso("2017-059"), datetime(2017, 2, 28)) + self.assertEqual(self.parser.parse_iso("2017-060"), datetime(2017, 3, 1)) + self.assertEqual(self.parser.parse_iso("2017-061"), datetime(2017, 3, 2)) + + # Since 2016 is a leap year, the 366th day falls in the same year + self.assertEqual(self.parser.parse_iso("2016-366"), datetime(2016, 12, 31)) + + # Since 2017 is not a leap year, the 366th day falls in the next year + self.assertEqual(self.parser.parse_iso("2017-366"), datetime(2018, 1, 1)) def test_YYYY_DDDD_HH_mm_ssZ(self): From d63fd39da2c160d05568eb5411501e9249ef0ca1 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 15:21:01 +0300 Subject: [PATCH 42/50] Consolidate and clean up errors for spacing --- arrow/parser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 266b439f7..0d7103d5a 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -111,16 +111,11 @@ def parse_iso(self, datetime_string): has_t_divider = "T" in datetime_string num_spaces = datetime_string.count(" ") - if has_space_divider and num_spaces != 1: + if (has_space_divider and num_spaces != 1) or ( + has_t_divider and num_spaces > 0 + ): raise ParserError( - "Expected an ISO 8601-like string, but was given '{}', which contains multiple spaces. Try passing in a format string to resolve this.".format( - datetime_string - ) - ) - - if has_t_divider and num_spaces > 0: - raise ParserError( - "Expected an ISO 8601-like string, but was given '{}', which contains a 'T' separator and spaces. Try passing in a format string to resolve this.".format( + "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( datetime_string ) ) From e99837382b6763b77588693e6aacd370d06eb4f8 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 15:22:47 +0300 Subject: [PATCH 43/50] Consolidate and clean up errors for spacing --- arrow/parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 0d7103d5a..1ded17aa9 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -111,9 +111,7 @@ def parse_iso(self, datetime_string): has_t_divider = "T" in datetime_string num_spaces = datetime_string.count(" ") - if (has_space_divider and num_spaces != 1) or ( - has_t_divider and num_spaces > 0 - ): + if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: raise ParserError( "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( datetime_string From 9940542e3b521ea446f1a50a4e057b626fb3f7e1 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sat, 17 Aug 2019 15:33:55 +0300 Subject: [PATCH 44/50] Cleaned up a comment --- arrow/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 1ded17aa9..ea3f6c6f4 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -201,8 +201,8 @@ def parse_iso(self, datetime_string): formats = ["{}T{}".format(f, time_string) for f in formats] if has_time and has_tz: - # Add "Z" to format strings to indicate to _parse_token - # that a timezone needs to be parsed + # Add "Z" or "ZZ" to the format strings to indicate to + # _parse_token() that a timezone needs to be parsed formats = ["{}{}".format(f, tz_format) for f in formats] return self._parse_multiformat(datetime_string, formats) From 66d09f85de6491c0b38023d4d11de18c6a6420fb Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Mon, 19 Aug 2019 16:55:24 +0300 Subject: [PATCH 45/50] Reverted version increment; added tests to get coverage to 100% --- arrow/_version.py | 2 +- arrow/parser.py | 10 ++-------- tests/parser_tests.py | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/arrow/_version.py b/arrow/_version.py index 9da2f8fcc..141826d55 100644 --- a/arrow/_version.py +++ b/arrow/_version.py @@ -1 +1 @@ -__version__ = "0.15.0" +__version__ = "0.14.5" diff --git a/arrow/parser.py b/arrow/parser.py index ea3f6c6f4..3ce85f225 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -173,7 +173,6 @@ def parse_iso(self, datetime_string): ) has_tz = len(time_parts) == 2 - has_hours = hours is not None has_minutes = minutes is not None has_seconds = seconds is not None has_subseconds = subseconds is not None @@ -188,12 +187,8 @@ def parse_iso(self, datetime_string): time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) elif has_minutes: time_string = "HH{time_sep}mm".format(time_sep=time_sep) - elif has_hours: - time_string = "HH" else: - raise ParserError( - "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." - ) + time_string = "HH" if has_space_divider: formats = ["{} {}".format(f, time_string) for f in formats] @@ -366,8 +361,7 @@ def _build_datetime(parts): timestamp = parts.get("timestamp") if timestamp is not None: - tz_utc = tz.tzutc() - return datetime.fromtimestamp(timestamp, tz=tz_utc) + return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) day_of_year = parts.get("day_of_year") diff --git a/tests/parser_tests.py b/tests/parser_tests.py index bd13b6eb0..7381384c7 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -212,6 +212,18 @@ def test_parse_timestamp(self): self.parser.parse("{:f}".format(float_timestamp), "X"), self.expected ) + # test handling of ns timestamp (arrow will round to 6 digits regardless) + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:f}123".format(float_timestamp), "X"), self.expected + ) + + # test ps timestamp (arrow will round to 6 digits regardless) + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:f}123456".format(float_timestamp), "X"), self.expected + ) + # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will # break cases like "15 Jul 2000" and a format list (see issue #447) with self.assertRaises(ParserError): @@ -743,6 +755,19 @@ def test_YYYY_MM_DDTHH_mm(self): self.parser.parse_iso("2013-02-03T04:05"), datetime(2013, 2, 3, 4, 5) ) + def test_YYYY_MM_DDTHH(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03T04"), datetime(2013, 2, 3, 4) + ) + + def test_YYYY_MM_DDTHHZ(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03T04+01:00"), + datetime(2013, 2, 3, 4, tzinfo=tz.tzoffset(None, 3600)), + ) + def test_YYYY_MM_DDTHH_mm_ssZ(self): self.assertEqual( From 32fe0a57214b0852a7054050e95f6d186b521e4c Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Mon, 19 Aug 2019 17:47:37 +0300 Subject: [PATCH 46/50] Removed mixing of formatS --- tests/parser_tests.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 7381384c7..bc6b0e6ac 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -1034,22 +1034,6 @@ def test_iso8601_basic_format(self): datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), ) - # mixing formats--this may raise a ParserError in the future - self.assertEqual( - self.parser.parse_iso("2018-05-17T105513-0700"), - datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), - ) - - self.assertEqual( - self.parser.parse_iso("20180517T10:55:13-07:00"), - datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), - ) - - self.assertEqual( - self.parser.parse_iso("20180517T105513-07:00"), - datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), - ) - # ordinal in basic format: YYYYDDDD self.assertEqual(self.parser.parse_iso("1998136"), datetime(1998, 5, 16)) From 66e97fea193b6e3d53bceafcd5d4b237c54aea59 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 8 Sep 2019 11:15:41 -0400 Subject: [PATCH 47/50] Changed checking of basic time format --- arrow/parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 3ce85f225..952131fd1 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -151,10 +151,8 @@ def parse_iso(self, datetime_string): date_string, time_string = datetime_string.split("T", 1) time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) - # TODO: should we prevent mixing basic and extended formats? would need to ensure that dates, times, and timezones are in same format - time_colon_count = time_parts[0].count(":") - is_basic_time_format = time_colon_count == 0 + is_basic_time_format = ":" not in time_parts[0] tz_format = "Z" # use 'ZZ' token instead since tz offset is present in non-basic format From 9aa2cb5138b150a60410a9505284517c4f4ef8d7 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 8 Sep 2019 11:47:50 -0400 Subject: [PATCH 48/50] Upgraded pre-commit packages --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 22aa58715..f8f133f43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v1.22.1 + rev: v1.23.0 hooks: - id: pyupgrade - repo: https://github.com/pre-commit/pygrep-hooks @@ -33,7 +33,7 @@ repos: - id: python-no-eval - id: python-check-blanket-noqa - id: rst-backticks - - repo: https://github.com/python/black + - repo: https://github.com/psf/black rev: 19.3b0 hooks: - id: black From ca1bb57cb41a3b7b1a314e6c69f091d5fdac6d84 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 8 Sep 2019 15:17:33 -0400 Subject: [PATCH 49/50] Added tests for leading and trailing whitespace --- arrow/parser.py | 3 +-- tests/parser_tests.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 8e3f6563f..716bbe968 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -125,9 +125,8 @@ def parse_iso(self, datetime_string): has_time = has_space_divider or has_t_divider has_tz = False - # TODO: add tests for all the new formats, especially basic format - # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) # date formats (ISO-8601 and others) to test against + # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) formats = [ "YYYY-MM-DD", "YYYY-M-DD", diff --git a/tests/parser_tests.py b/tests/parser_tests.py index e020df7eb..d13433033 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -1002,6 +1002,42 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) + def test_parse_iso_with_leading_and_trailing_whitespace(self): + datetime_string = " 2016-11-15T06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = " 2016-11-15T06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = "2016-11-15T06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = "2016-11-15T 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # leading whitespace + datetime_string = " 2016-11-15 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # trailing whitespace + datetime_string = "2016-11-15 06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = " 2016-11-15 06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # two dividing spaces + datetime_string = "2016-11-15 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + def test_parse_iso_with_extra_words_at_start_and_end_invalid(self): test_inputs = [ "blah2016", From 3160c4c20c08ef5c0b59a4297edb9e32d32c2d60 Mon Sep 17 00:00:00 2001 From: Jad Chaar Date: Sun, 8 Sep 2019 15:48:43 -0400 Subject: [PATCH 50/50] Added test for comma subsecond separator and moved setting of tz_format down in file --- arrow/parser.py | 14 +++++++------- tests/parser_tests.py | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arrow/parser.py b/arrow/parser.py index 716bbe968..f1469a00b 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -155,13 +155,6 @@ def parse_iso(self, datetime_string): time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) - is_basic_time_format = ":" not in time_parts[0] - tz_format = "Z" - - # use 'ZZ' token instead since tz offset is present in non-basic format - if len(time_parts) == 2 and ":" in time_parts[1]: - tz_format = "ZZ" - time_components = self._TIME_RE.match(time_parts[0]) if time_components is None: @@ -178,6 +171,13 @@ def parse_iso(self, datetime_string): has_seconds = seconds is not None has_subseconds = subseconds is not None + is_basic_time_format = ":" not in time_parts[0] + tz_format = "Z" + + # use 'ZZ' token instead since tz offset is present in non-basic format + if has_tz and ":" in time_parts[1]: + tz_format = "ZZ" + time_sep = "" if is_basic_time_format else ":" if has_subseconds: diff --git a/tests/parser_tests.py b/tests/parser_tests.py index d13433033..36bafcc9d 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -976,6 +976,12 @@ def test_parse_subsecond(self): self.parser.parse_iso("2013-01-01 12:30:45.987654"), self.expected ) + # use comma as subsecond separator + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45,987654"), self.expected + ) + def test_gnu_date(self): """ regression tests for parsing output from GNU date(1)