diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 22aa58715..f8f133f43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v1.22.1 + rev: v1.23.0 hooks: - id: pyupgrade - repo: https://github.com/pre-commit/pygrep-hooks @@ -33,7 +33,7 @@ repos: - id: python-no-eval - id: python-check-blanket-noqa - id: rst-backticks - - repo: https://github.com/python/black + - repo: https://github.com/psf/black rev: 19.3b0 hooks: - id: black diff --git a/arrow/arrow.py b/arrow/arrow.py index 5ff96bddd..9ed179c15 100644 --- a/arrow/arrow.py +++ b/arrow/arrow.py @@ -1372,7 +1372,7 @@ def _get_iteration_params(cls, end, limit): if end is None: if limit is None: - raise Exception("one of 'end' or 'limit' is required") + raise ValueError("one of 'end' or 'limit' is required") return cls.max, limit diff --git a/arrow/factory.py b/arrow/factory.py index d1c233c06..f3f3b7fe6 100644 --- a/arrow/factory.py +++ b/arrow/factory.py @@ -9,7 +9,6 @@ from __future__ import absolute_import import calendar -import warnings from datetime import date, datetime from datetime import tzinfo as dt_tzinfo from time import struct_time @@ -21,18 +20,6 @@ from arrow.util import is_timestamp, isstr -class ArrowParseWarning(DeprecationWarning): - """Raised when arrow.get() is passed a string with no formats and matches incorrectly - on one of the default formats. - - e.g. - arrow.get('blabla2016') -> - arrow.get('13/4/2045') -> - - In version 0.15.0 this warning will become a ParserError. - """ - - class ArrowFactory(object): """ A factory for generating :class:`Arrow ` objects. @@ -73,7 +60,7 @@ def get(self, *args, **kwargs): >>> arrow.get(arw) - **One** ``str``, ``float``, or ``int``, convertible to a floating-point timestamp, to get + **One** ``float`` or ``int``, convertible to a floating-point timestamp, to get that timestamp in UTC:: >>> arrow.get(1367992474.293378) @@ -82,17 +69,16 @@ def get(self, *args, **kwargs): >>> arrow.get(1367992474) - >>> arrow.get('1367992474.293378') - - - >>> arrow.get('1367992474') - - **One** ISO-8601-formatted ``str``, to parse it:: >>> arrow.get('2013-09-29T01:26:43.830580') + **One** ISO-8601-formatted ``str``, in basic format, to parse it:: + + >>> arrow.get('20160413T133656.456289') + + **One** ``tzinfo``, to get the current time **converted** to that timezone:: >>> arrow.get(tz.tzlocal()) @@ -177,7 +163,7 @@ def get(self, *args, **kwargs): if arg is None: return self.type.utcnow() - # try (int, float, str(int), str(float)) -> utc, from timestamp. + # try (int, float) -> utc, from timestamp. if is_timestamp(arg): return self.type.utcfromtimestamp(arg) @@ -199,11 +185,6 @@ def get(self, *args, **kwargs): # (str) -> parse. elif isstr(arg): - warnings.warn( - "The .get() parsing method without a format string will parse more strictly in version 0.15.0." - "See https://github.com/crsmithdev/arrow/issues/612 for more details.", - ArrowParseWarning, - ) dt = parser.DateTimeParser(locale).parse_iso(arg) return self.type.fromdatetime(dt, tz) @@ -246,11 +227,6 @@ def get(self, *args, **kwargs): # (str, format) -> parse. elif isstr(arg_1) and (isstr(arg_2) or isinstance(arg_2, list)): - warnings.warn( - "The .get() parsing method with a format string will parse more strictly in version 0.15.0." - "See https://github.com/crsmithdev/arrow/issues/612 for more details.", - ArrowParseWarning, - ) dt = parser.DateTimeParser(locale).parse(args[0], args[1]) return self.type.fromdatetime(dt, tzinfo=tz) diff --git a/arrow/parser.py b/arrow/parser.py index 9930ab08c..f1469a00b 100644 --- a/arrow/parser.py +++ b/arrow/parser.py @@ -14,7 +14,16 @@ from backports.functools_lru_cache import lru_cache # pragma: no cover -class ParserError(RuntimeError): +class ParserError(ValueError): + pass + + +# Allows for ParserErrors to be propagated from _build_datetime() +# when day_of_year errors occur. +# Before this, the ParserErrors were caught by the try/except in +# _parse_multiformat() and the appropriate error message was not +# transmitted to the user. +class ParserMatchError(ParserError): pass @@ -25,18 +34,27 @@ class DateTimeParser(object): ) _ESCAPE_RE = re.compile(r"\[[^\[\]]*\]") - _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+") _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}") - _FOUR_DIGIT_RE = re.compile(r"\d{4}") + _ONE_OR_TWO_OR_THREE_DIGIT_RE = re.compile(r"\d{1,3}") + _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+") _TWO_DIGIT_RE = re.compile(r"\d{2}") - _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?") + _THREE_DIGIT_RE = re.compile(r"\d{3}") + _FOUR_DIGIT_RE = re.compile(r"\d{4}") + _TZ_Z_RE = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") + _TZ_ZZ_RE = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+") + # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will + # break cases like "15 Jul 2000" and a format list (see issue #447) + _TIMESTAMP_RE = re.compile(r"^\d+\.?\d+$") + _TIME_RE = re.compile(r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$") _BASE_INPUT_RE_MAP = { "YYYY": _FOUR_DIGIT_RE, "YY": _TWO_DIGIT_RE, "MM": _TWO_DIGIT_RE, "M": _ONE_OR_TWO_DIGIT_RE, + "DDDD": _THREE_DIGIT_RE, + "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, "DD": _TWO_DIGIT_RE, "D": _ONE_OR_TWO_DIGIT_RE, "HH": _TWO_DIGIT_RE, @@ -47,14 +65,13 @@ class DateTimeParser(object): "m": _ONE_OR_TWO_DIGIT_RE, "ss": _TWO_DIGIT_RE, "s": _ONE_OR_TWO_DIGIT_RE, - "X": re.compile(r"\d+"), + "X": _TIMESTAMP_RE, "ZZZ": _TZ_NAME_RE, - "ZZ": _TZ_RE, - "Z": _TZ_RE, + "ZZ": _TZ_ZZ_RE, + "Z": _TZ_Z_RE, "S": _ONE_OR_MORE_DIGIT_RE, } - MARKERS = ["YYYY", "MM", "DD"] SEPARATORS = ["-", "/", "."] def __init__(self, locale="en_us", cache_size=0): @@ -90,45 +107,124 @@ def __init__(self, locale="en_us", cache_size=0): self._generate_pattern_re ) - def parse_iso(self, string): + # TODO: since we support more than ISO-8601, we should rename this function + # IDEA: break into multiple functions + def parse_iso(self, datetime_string): + # TODO: add a flag to normalize whitespace (useful in logs, ref issue #421) + has_space_divider = " " in datetime_string + has_t_divider = "T" in datetime_string + + num_spaces = datetime_string.count(" ") + if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: + raise ParserError( + "Expected an ISO 8601-like string, but was given '{}'. Try passing in a format string to resolve this.".format( + datetime_string + ) + ) - has_time = "T" in string or " " in string.strip() - space_divider = " " in string.strip() + has_time = has_space_divider or has_t_divider + has_tz = False + + # date formats (ISO-8601 and others) to test against + # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) + formats = [ + "YYYY-MM-DD", + "YYYY-M-DD", + "YYYY-M-D", + "YYYY/MM/DD", + "YYYY/M/DD", + "YYYY/M/D", + "YYYY.MM.DD", + "YYYY.M.DD", + "YYYY.M.D", + "YYYYMMDD", + "YYYY-DDDD", + "YYYYDDDD", + "YYYY-MM", + "YYYY/MM", + "YYYY.MM", + "YYYY", + ] if has_time: - if space_divider: - date_string, time_string = string.split(" ", 1) + + if has_space_divider: + date_string, time_string = datetime_string.split(" ", 1) else: - date_string, time_string = string.split("T", 1) - time_parts = re.split("[+-]", time_string, 1) - has_tz = len(time_parts) > 1 - has_seconds = time_parts[0].count(":") > 1 - has_subseconds = re.search("[.,]", time_parts[0]) + date_string, time_string = datetime_string.split("T", 1) + + time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) + + time_components = self._TIME_RE.match(time_parts[0]) + + if time_components is None: + raise ParserError( + "Invalid time component provided. Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." + ) + + hours, minutes, seconds, subseconds_sep, subseconds = ( + time_components.groups() + ) + + has_tz = len(time_parts) == 2 + has_minutes = minutes is not None + has_seconds = seconds is not None + has_subseconds = subseconds is not None + + is_basic_time_format = ":" not in time_parts[0] + tz_format = "Z" + + # use 'ZZ' token instead since tz offset is present in non-basic format + if has_tz and ":" in time_parts[1]: + tz_format = "ZZ" + + time_sep = "" if is_basic_time_format else ":" if has_subseconds: - formats = ["YYYY-MM-DDTHH:mm:ss%sS" % has_subseconds.group()] + time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( + time_sep=time_sep, subseconds_sep=subseconds_sep + ) elif has_seconds: - formats = ["YYYY-MM-DDTHH:mm:ss"] + time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) + elif has_minutes: + time_string = "HH{time_sep}mm".format(time_sep=time_sep) else: - formats = ["YYYY-MM-DDTHH:mm"] - else: - has_tz = False - # generate required formats: YYYY-MM-DD, YYYY-MM-DD, YYYY - # using various separators: -, /, . - len_markers = len(self.MARKERS) - formats = [ - separator.join(self.MARKERS[: len_markers - i]) - for i in range(len_markers) - for separator in self.SEPARATORS - ] + time_string = "HH" + + if has_space_divider: + formats = ["{} {}".format(f, time_string) for f in formats] + else: + formats = ["{}T{}".format(f, time_string) for f in formats] if has_time and has_tz: - formats = [f + "Z" for f in formats] + # Add "Z" or "ZZ" to the format strings to indicate to + # _parse_token() that a timezone needs to be parsed + formats = ["{}{}".format(f, tz_format) for f in formats] + + return self._parse_multiformat(datetime_string, formats) - if space_divider: - formats = [item.replace("T", " ", 1) for item in formats] + def parse(self, datetime_string, fmt): - return self._parse_multiformat(string, formats) + if isinstance(fmt, list): + return self._parse_multiformat(datetime_string, fmt) + + fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) + + match = fmt_pattern_re.search(datetime_string) + if match is None: + raise ParserMatchError( + "Failed to match '{}' when parsing '{}'".format(fmt, datetime_string) + ) + + parts = {} + for token in fmt_tokens: + if token == "Do": + value = match.group("value") + else: + value = match.group(token) + self._parse_token(token, value, parts) + + return self._build_datetime(parts) def _generate_pattern_re(self, fmt): @@ -144,8 +240,11 @@ def _generate_pattern_re(self, fmt): # Extract the bracketed expressions to be reinserted later. escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) + # Any number of S is the same as one. - escaped_fmt = re.sub("S+", "S", escaped_fmt) + # TODO: allow users to specify the number of digits to parse + escaped_fmt = re.sub(r"S+", "S", escaped_fmt) + escaped_data = re.findall(self._ESCAPE_RE, fmt) fmt_pattern = escaped_fmt @@ -170,44 +269,36 @@ def _generate_pattern_re(self, fmt): offset += len(input_pattern) - (m.end() - m.start()) final_fmt_pattern = "" - a = fmt_pattern.split(r"\#") - b = escaped_data - - # Due to the way Python splits, 'a' will always be longer - for i in range(len(a)): - final_fmt_pattern += a[i] - if i < len(b): - final_fmt_pattern += b[i][1:-1] - - return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE) - - def parse(self, string, fmt): - - if isinstance(fmt, list): - return self._parse_multiformat(string, fmt) - - fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) + split_fmt = fmt_pattern.split(r"\#") + + # Due to the way Python splits, 'split_fmt' will always be longer + for i in range(len(split_fmt)): + final_fmt_pattern += split_fmt[i] + if i < len(escaped_data): + final_fmt_pattern += escaped_data[i][1:-1] + + # Wrap final_fmt_pattern in a custom word boundary to strictly + # match the formatting pattern and filter out date and time formats + # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, + # blah1998-09-12blah. The custom word boundary matches every character + # that is not a whitespace character to allow for searching for a date + # and time string in a natural language sentence. Therefore, searching + # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will + # work properly. + # Reference: https://stackoverflow.com/q/14232931/3820660 + starting_word_boundary = r"(? 68 else 2000 + value @@ -218,6 +309,9 @@ def _parse_token(self, token, value, parts): elif token in ["MM", "M"]: parts["month"] = int(value) + elif token in ["DDDD", "DDD"]: + parts["day_of_year"] = int(value) + elif token in ["DD", "D"]: parts["day"] = int(value) @@ -236,7 +330,7 @@ def _parse_token(self, token, value, parts): elif token == "S": # We have the *most significant* digits of an arbitrary-precision integer. # We want the six most significant digits as an integer, rounded. - # FIXME: add nanosecond support somehow? + # IDEA: add nanosecond support somehow? Need datetime support for it first. value = value.ljust(7, str("0")) # floating-point (IEEE-754) defaults to half-to-even rounding @@ -251,7 +345,7 @@ def _parse_token(self, token, value, parts): parts["microsecond"] = int(value[:6]) + rounding elif token == "X": - parts["timestamp"] = int(value) + parts["timestamp"] = float(value) elif token in ["ZZZ", "ZZ", "Z"]: parts["tzinfo"] = TzinfoParser.parse(value) @@ -267,9 +361,35 @@ def _build_datetime(parts): timestamp = parts.get("timestamp") - if timestamp: - tz_utc = tz.tzutc() - return datetime.fromtimestamp(timestamp, tz=tz_utc) + if timestamp is not None: + return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) + + day_of_year = parts.get("day_of_year") + + if day_of_year is not None: + year = parts.get("year") + month = parts.get("month") + if year is None: + raise ParserError( + "Year component is required with the DDD and DDDD tokens." + ) + + if month is not None: + raise ParserError( + "Month component is not allowed with the DDD and DDDD tokens." + ) + + date_string = "{}-{}".format(year, day_of_year) + try: + dt = datetime.strptime(date_string, "%Y-%j") + except ValueError: + raise ParserError( + "The provided day of year '{}' is invalid.".format(day_of_year) + ) + + parts["year"] = dt.year + parts["month"] = dt.month + parts["day"] = dt.day am_pm = parts.get("am_pm") hour = parts.get("hour", 0) @@ -311,32 +431,18 @@ def _parse_multiformat(self, string, formats): try: _datetime = self.parse(string, fmt) break - except ParserError: + except ParserMatchError: pass if _datetime is None: raise ParserError( - "Could not match input to any of {} on '{}'".format(formats, string) + "Could not match input '{}' to any of the following formats: {}".format( + string, ", ".join(formats) + ) ) return _datetime - @staticmethod - def _map_lookup(input_map, key): - - try: - return input_map[key] - except KeyError: - raise ParserError('Could not match "{}" to {}'.format(key, input_map)) - - @staticmethod - def _try_timestamp(string): - - try: - return float(string) - except Exception: - return None - # generates a capture group of choices separated by an OR operator @staticmethod def _generate_choice_re(choices, flags=0): @@ -344,23 +450,23 @@ def _generate_choice_re(choices, flags=0): class TzinfoParser(object): - - _TZINFO_RE = re.compile(r"([+\-])?(\d\d):?(\d\d)?") + # TODO: test against full timezone DB + _TZINFO_RE = re.compile(r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$") @classmethod - def parse(cls, string): + def parse(cls, tzinfo_string): tzinfo = None - if string == "local": + if tzinfo_string == "local": tzinfo = tz.tzlocal() - elif string in ["utc", "UTC"]: + elif tzinfo_string in ["utc", "UTC", "Z"]: tzinfo = tz.tzutc() else: - iso_match = cls._TZINFO_RE.match(string) + iso_match = cls._TZINFO_RE.match(tzinfo_string) if iso_match: sign, hours, minutes = iso_match.groups() @@ -374,9 +480,11 @@ def parse(cls, string): tzinfo = tz.tzoffset(None, seconds) else: - tzinfo = tz.gettz(string) + tzinfo = tz.gettz(tzinfo_string) if tzinfo is None: - raise ParserError('Could not parse timezone expression "{}"'.format(string)) + raise ParserError( + 'Could not parse timezone expression "{}"'.format(tzinfo_string) + ) return tzinfo diff --git a/arrow/util.py b/arrow/util.py index 03132f7ed..8a379a4ef 100644 --- a/arrow/util.py +++ b/arrow/util.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import sys -import warnings +from datetime import datetime def total_seconds(td): # pragma: no cover @@ -10,12 +9,12 @@ def total_seconds(td): # pragma: no cover def is_timestamp(value): - if type(value) == bool: + if isinstance(value, bool): return False try: - float(value) + datetime.fromtimestamp(value) return True - except Exception: + except TypeError: return False @@ -34,62 +33,4 @@ def isstr(s): return isinstance(s, str) -class list_to_iter_shim(list): - """ A temporary shim for functions that currently return a list but that will, after a - deprecation period, return an iteratator. - """ - - def __init__(self, iterable=(), **kwargs): - """ Equivalent to list(iterable). warn_text will be emitted on all non-iterator operations. - """ - self._warn_text = ( - kwargs.pop("warn_text", None) - or "this object will be converted to an iterator in a future release" - ) - self._iter_count = 0 - list.__init__(self, iterable, **kwargs) - - def _warn(self): - warnings.warn(self._warn_text, DeprecationWarning) - - def __iter__(self): - self._iter_count += 1 - if self._iter_count > 1: - self._warn() - return list.__iter__(self) - - def _wrap_method(name): - list_func = getattr(list, name) - - def wrapper(self, *args, **kwargs): - self._warn() - return list_func(self, *args, **kwargs) - - return wrapper - - __contains__ = _wrap_method("__contains__") - __add__ = _wrap_method("__add__") - __mul__ = _wrap_method("__mul__") - __getitem__ = _wrap_method("__getitem__") - # Ideally, we would throw warnings from __len__, but list(x) calls len(x) - index = _wrap_method("index") - count = _wrap_method("count") - __setitem__ = _wrap_method("__setitem__") - __delitem__ = _wrap_method("__delitem__") - append = _wrap_method("append") - if sys.version_info.major >= 3: # pragma: no cover - clear = _wrap_method("clear") - copy = _wrap_method("copy") - extend = _wrap_method("extend") - __iadd__ = _wrap_method("__iadd__") - __imul__ = _wrap_method("__imul__") - insert = _wrap_method("insert") - pop = _wrap_method("pop") - remove = _wrap_method("remove") - reverse = _wrap_method("reverse") - sort = _wrap_method("sort") - - del _wrap_method - - -__all__ = ["total_seconds", "is_timestamp", "isstr", "list_to_iter_shim"] +__all__ = ["total_seconds", "is_timestamp", "isstr"] diff --git a/docs/index.rst b/docs/index.rst index 1f9cd1a47..78fcea57c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,22 +26,16 @@ Get 'now' easily: >>> arrow.now('US/Pacific') -Create from timestamps (ints or floats, or strings that convert to a float): +Create from timestamps (:code:`int` or :code:`float`): .. code-block:: python >>> arrow.get(1367900664) - >>> arrow.get('1367900664') - - >>> arrow.get(1367900664.152325) - >>> arrow.get('1367900664.152325') - - Use a naive or timezone-aware datetime, or flexibly specify a timezone: .. code-block:: python @@ -327,9 +321,9 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ | |M |1, 2, 3 ... 11, 12 | +--------------------------------+--------------+-------------------------------------------+ -|**Day of Year** |DDDD [#t5]_ |001, 002, 003 ... 364, 365 | +|**Day of Year** |DDDD |001, 002, 003 ... 364, 365 | +--------------------------------+--------------+-------------------------------------------+ -| |DDD [#t5]_ |1, 2, 3 ... 4, 5 | +| |DDD |1, 2, 3 ... 364, 365 | +--------------------------------+--------------+-------------------------------------------+ |**Day of Month** |DD |01, 02, 03 ... 30, 31 | +--------------------------------+--------------+-------------------------------------------+ @@ -367,11 +361,11 @@ Use the following tokens in parsing and formatting. Note that they're not the s +--------------------------------+--------------+-------------------------------------------+ |**Timezone** |ZZZ |Asia/Baku, Europe/Warsaw, GMT ... [#t4]_ | +--------------------------------+--------------+-------------------------------------------+ -| |ZZ |-07:00, -06:00 ... +06:00, +07:00 | +| |ZZ |-07:00, -06:00 ... +06:00, +07:00, +08, Z | +--------------------------------+--------------+-------------------------------------------+ -| |Z |-0700, -0600 ... +0600, +0700 | +| |Z |-0700, -0600 ... +0600, +0700, +08, Z | +--------------------------------+--------------+-------------------------------------------+ -|**Timestamp** |X |1381685817 | +|**Timestamp** |X |1381685817, 1381685817.915482 ... [#t5]_ | +--------------------------------+--------------+-------------------------------------------+ .. rubric:: Footnotes @@ -380,7 +374,7 @@ Use the following tokens in parsing and formatting. Note that they're not the s .. [#t2] localization support only for formatting .. [#t3] the result is truncated to microseconds, with `half-to-even rounding `_. .. [#t4] timezone names from `tz database `_ provided via dateutil package -.. [#t5] support for the DDD and DDDD tokens will be added in a future release +.. [#t5] this token cannot be used for parsing timestamps out of natural language strings due to compatibility reasons Escaping Formats ~~~~~~~~~~~~~~~~ diff --git a/tests/arrow_tests.py b/tests/arrow_tests.py index ef6928765..f2436550e 100644 --- a/tests/arrow_tests.py +++ b/tests/arrow_tests.py @@ -5,7 +5,6 @@ import pickle import sys import time -import warnings from datetime import date, datetime, timedelta import pytz @@ -1826,69 +1825,5 @@ def test_get_iteration_params(self): ) self.assertEqual(arrow.Arrow._get_iteration_params(100, 120), (100, 120)) - with self.assertRaises(Exception): + with self.assertRaises(ValueError): arrow.Arrow._get_iteration_params(None, None) - - def test_list_to_iter_shim(self): - def newshim(): - return util.list_to_iter_shim(range(5), warn_text="testing") - - # Iterating over a shim once should not throw a warning - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - - iter(newshim()) - list(newshim()) - for _ in newshim(): - pass - len(newshim()) # ...because it's called by `list(x)` - - self.assertEqual([], w) - - # Iterating over a shim twice (or more) should throw a warning - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - shim = newshim() - - for _ in shim: - pass - for _ in shim: - pass - - self.assertEqual(1, len(w)) - self.assertEqual(w[0].category, DeprecationWarning) - self.assertEqual("testing", w[0].message.args[0]) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - shim = newshim() - - 0 in shim - shim + [] - shim * 1 - shim[0] - shim.index(0) - shim.count(0) - - shim[0:0] = [] # doesn't warn on py2 - del shim[0:0] # doesn't warn on py2 - newshim().append(6) - if sys.version_info.major >= 3: # pragma: no cover - newshim().clear() - shim.copy() - shim.extend([]) - shim += [] - shim *= 1 - newshim().insert(0, 6) - shim.pop(-1) - newshim().remove(0) - newshim().reverse() - newshim().sort() - - if sys.version_info.major >= 3: # pragma: no cover - self.assertEqual(19, len(w)) - else: # pragma: no cover - self.assertEqual(15, len(w)) - for warn in w: - self.assertEqual(warn.category, DeprecationWarning) - self.assertEqual("testing", warn.message.args[0]) diff --git a/tests/factory_tests.py b/tests/factory_tests.py index f2ec3add1..0e89ffb01 100644 --- a/tests/factory_tests.py +++ b/tests/factory_tests.py @@ -26,7 +26,7 @@ def test_no_args(self): def test_timestamp_one_arg_no_arg(self): - no_arg = self.factory.get("1406430900").timestamp + no_arg = self.factory.get(1406430900).timestamp one_arg = self.factory.get("1406430900", "X").timestamp self.assertEqual(no_arg, one_arg) @@ -46,20 +46,28 @@ def test_struct_time(self): def test_one_arg_timestamp(self): - timestamp = 12345 - timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) + int_timestamp = int(time.time()) + timestamp_dt = datetime.utcfromtimestamp(int_timestamp).replace( + tzinfo=tz.tzutc() + ) + + self.assertEqual(self.factory.get(int_timestamp), timestamp_dt) + + with self.assertRaises(ParserError): + self.factory.get(str(int_timestamp)) - self.assertEqual(self.factory.get(timestamp), timestamp_dt) - self.assertEqual(self.factory.get(str(timestamp)), timestamp_dt) + float_timestamp = time.time() + timestamp_dt = datetime.utcfromtimestamp(float_timestamp).replace( + tzinfo=tz.tzutc() + ) - timestamp = 123.45 - timestamp_dt = datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz.tzutc()) + self.assertEqual(self.factory.get(float_timestamp), timestamp_dt) - self.assertEqual(self.factory.get(timestamp), timestamp_dt) - self.assertEqual(self.factory.get(str(timestamp)), timestamp_dt) + with self.assertRaises(ParserError): + self.factory.get(str(float_timestamp)) - # Issue 216 - timestamp = "99999999999999999999999999" + # Regression test for issue #216 + timestamp = 99999999999999999999999999 # Python 3 raises `OverflowError`, Python 2 raises `ValueError` with self.assertRaises((OverflowError, ValueError)): self.factory.get(timestamp) diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 095fa5040..36bafcc9d 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -9,7 +9,7 @@ from dateutil import tz from arrow import parser -from arrow.parser import DateTimeParser, ParserError +from arrow.parser import DateTimeParser, ParserError, ParserMatchError class DateTimeParserTests(Chai): @@ -22,7 +22,7 @@ def test_parse_multiformat(self): mock_datetime = self.mock() - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserMatchError) self.expect(self.parser.parse).args("str", "fmt_b").returns(mock_datetime) result = self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -31,8 +31,8 @@ def test_parse_multiformat(self): def test_parse_multiformat_all_fail(self): - self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserError) - self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserError) + self.expect(self.parser.parse).args("str", "fmt_a").raises(ParserMatchError) + self.expect(self.parser.parse).args("str", "fmt_b").raises(ParserMatchError) with self.assertRaises(ParserError): self.parser._parse_multiformat("str", ["fmt_a", "fmt_b"]) @@ -111,6 +111,41 @@ def test_parser_multiple_line_caching(self): for _ in range(100): self.parser._generate_pattern_re("fmt_a") + def test_YY_and_YYYY_format_list(self): + + self.assertEqual( + self.parser.parse("15/01/19", ["DD/MM/YY", "DD/MM/YYYY"]), + datetime(2019, 1, 15), + ) + + # Regression test for issue #580 + self.assertEqual( + self.parser.parse("15/01/2019", ["DD/MM/YY", "DD/MM/YYYY"]), + datetime(2019, 1, 15), + ) + + self.assertEqual( + self.parser.parse( + "15/01/2019T04:05:06.789120Z", + ["D/M/YYThh:mm:ss.SZ", "D/M/YYYYThh:mm:ss.SZ"], + ), + datetime(2019, 1, 15, 4, 5, 6, 789120, tzinfo=tz.tzutc()), + ) + + # regression test for issue #447 + def test_timestamp_format_list(self): + # should not match on the "X" token + self.assertEqual( + self.parser.parse( + "15 Jul 2000", + ["MM/DD/YYYY", "YYYY-MM-DD", "X", "DD-MMMM-YYYY", "D MMM YYYY"], + ), + datetime(2000, 7, 15), + ) + + with self.assertRaises(ParserError): + self.parser.parse("15 Jul", "X") + class DateTimeParserParseTests(Chai): def setUp(self): @@ -139,12 +174,12 @@ def test_parse_unrecognized_token(self): def test_parse_parse_no_match(self): - with self.assertRaises(parser.ParserError): + with self.assertRaises(ParserError): self.parser.parse("01-01", "YYYY-MM-DD") def test_parse_separators(self): - with self.assertRaises(parser.ParserError): + with self.assertRaises(ParserError): self.parser.parse("1403549231", "YYYY-MM-DD") def test_parse_numbers(self): @@ -165,9 +200,43 @@ def test_parse_year_two_digit(self): def test_parse_timestamp(self): tz_utc = tz.tzutc() - timestamp = int(time.time()) - self.expected = datetime.fromtimestamp(timestamp, tz=tz_utc) - self.assertEqual(self.parser.parse(str(timestamp), "X"), self.expected) + int_timestamp = int(time.time()) + self.expected = datetime.fromtimestamp(int_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:d}".format(int_timestamp), "X"), self.expected + ) + + float_timestamp = time.time() + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:f}".format(float_timestamp), "X"), self.expected + ) + + # test handling of ns timestamp (arrow will round to 6 digits regardless) + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:f}123".format(float_timestamp), "X"), self.expected + ) + + # test ps timestamp (arrow will round to 6 digits regardless) + self.expected = datetime.fromtimestamp(float_timestamp, tz=tz_utc) + self.assertEqual( + self.parser.parse("{:f}123456".format(float_timestamp), "X"), self.expected + ) + + # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will + # break cases like "15 Jul 2000" and a format list (see issue #447) + with self.assertRaises(ParserError): + natural_lang_string = "Meet me at {} at the restaurant.".format( + float_timestamp + ) + self.parser.parse(natural_lang_string, "X") + + with self.assertRaises(ParserError): + self.parser.parse("1565982019.", "X") + + with self.assertRaises(ParserError): + self.parser.parse(".1565982019", "X") def test_parse_names(self): @@ -211,9 +280,9 @@ def test_parse_tz_zz(self): def test_parse_tz_name_zzz(self): for tz_name in ( # best solution would be to test on every available tz name from - # the tz database but it is actualy tricky to retrieve them from + # the tz database but it is actually tricky to retrieve them from # dateutil so here is short list that should match all - # naming patterns/conventions in used tz databaze + # naming patterns/conventions in used tz database "Africa/Tripoli", "America/Port_of_Spain", "Australia/LHI", @@ -234,50 +303,46 @@ def test_parse_tz_name_zzz(self): # note that offsets are not timezones with self.assertRaises(ParserError): - self.parser.parse("2013-01-01 +1000", "YYYY-MM-DD ZZZ") + self.parser.parse("2013-01-01 12:30:45.9+1000", "YYYY-MM-DDZZZ") - def test_parse_subsecond(self): + with self.assertRaises(ParserError): + self.parser.parse("2013-01-01 12:30:45.9+10:00", "YYYY-MM-DDZZZ") + + with self.assertRaises(ParserError): + self.parser.parse("2013-01-01 12:30:45.9-10", "YYYY-MM-DDZZZ") + def test_parse_subsecond(self): + # TODO: make both test_parse_subsecond functions in Parse and ParseISO + # tests use the same expected objects (use pytest fixtures) self.expected = datetime(2013, 1, 1, 12, 30, 45, 900000) self.assertEqual( self.parser.parse("2013-01-01 12:30:45.9", "YYYY-MM-DD HH:mm:ss.S"), self.expected, ) - self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.9"), self.expected) self.expected = datetime(2013, 1, 1, 12, 30, 45, 980000) self.assertEqual( self.parser.parse("2013-01-01 12:30:45.98", "YYYY-MM-DD HH:mm:ss.SS"), self.expected, ) - self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.98"), self.expected) self.expected = datetime(2013, 1, 1, 12, 30, 45, 987000) self.assertEqual( self.parser.parse("2013-01-01 12:30:45.987", "YYYY-MM-DD HH:mm:ss.SSS"), self.expected, ) - self.assertEqual( - self.parser.parse_iso("2013-01-01 12:30:45.987"), self.expected - ) self.expected = datetime(2013, 1, 1, 12, 30, 45, 987600) self.assertEqual( self.parser.parse("2013-01-01 12:30:45.9876", "YYYY-MM-DD HH:mm:ss.SSSS"), self.expected, ) - self.assertEqual( - self.parser.parse_iso("2013-01-01 12:30:45.9876"), self.expected - ) self.expected = datetime(2013, 1, 1, 12, 30, 45, 987650) self.assertEqual( self.parser.parse("2013-01-01 12:30:45.98765", "YYYY-MM-DD HH:mm:ss.SSSSS"), self.expected, ) - self.assertEqual( - self.parser.parse_iso("2013-01-01 12:30:45.98765"), self.expected - ) self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) self.assertEqual( @@ -286,68 +351,200 @@ def test_parse_subsecond(self): ), self.expected, ) - self.assertEqual( - self.parser.parse_iso("2013-01-01 12:30:45.987654"), self.expected - ) def test_parse_subsecond_rounding(self): self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) - format = "YYYY-MM-DD HH:mm:ss.S" + datetime_format = "YYYY-MM-DD HH:mm:ss.S" # round up string = "2013-01-01 12:30:45.9876539" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) # round down string = "2013-01-01 12:30:45.98765432" - self.assertEqual(self.parser.parse(string, format), self.expected) - # import pudb; pudb.set_trace() + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) # round half-up string = "2013-01-01 12:30:45.987653521" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) # round half-down string = "2013-01-01 12:30:45.9876545210" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) - # overflow (zero out the subseconds and increment the seconds) - # regression tests for issue #636 + # overflow (zero out the subseconds and increment the seconds) + # regression tests for issue #636 + def test_parse_subsecond_rounding_overflow(self): + datetime_format = "YYYY-MM-DD HH:mm:ss.S" + self.expected = datetime(2013, 1, 1, 12, 30, 46) string = "2013-01-01 12:30:45.9999995" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) self.expected = datetime(2013, 1, 1, 12, 31, 0) string = "2013-01-01 12:30:59.9999999" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) self.expected = datetime(2013, 1, 2, 0, 0, 0) string = "2013-01-01 23:59:59.9999999" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) # 6 digits should remain unrounded self.expected = datetime(2013, 1, 1, 12, 30, 45, 999999) string = "2013-01-01 12:30:45.999999" - self.assertEqual(self.parser.parse(string, format), self.expected) + self.assertEqual(self.parser.parse(string, datetime_format), self.expected) self.assertEqual(self.parser.parse_iso(string), self.expected) - def test_map_lookup_keyerror(self): + # Regression tests for issue #560 + def test_parse_long_year(self): + with self.assertRaises(ParserError): + self.parser.parse("09 January 123456789101112", "DD MMMM YYYY") - with self.assertRaises(parser.ParserError): - parser.DateTimeParser._map_lookup({"a": "1"}, "b") + with self.assertRaises(ParserError): + self.parser.parse("123456789101112 09 January", "YYYY DD MMMM") + + with self.assertRaises(ParserError): + self.parser.parse("68096653015/01/19", "YY/M/DD") - def test_try_timestamp(self): + def test_parse_with_extra_words_at_start_and_end_invalid(self): + input_format_pairs = [ + ("blah2016", "YYYY"), + ("blah2016blah", "YYYY"), + ("2016blah", "YYYY"), + ("2016-05blah", "YYYY-MM"), + ("2016-05-16blah", "YYYY-MM-DD"), + ("2016-05-16T04:05:06.789120blah", "YYYY-MM-DDThh:mm:ss.S"), + ("2016-05-16T04:05:06.789120ZblahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120Zblah", "YYYY-MM-DDThh:mm:ss.SZ"), + ("2016-05-16T04:05:06.789120blahZ", "YYYY-MM-DDThh:mm:ss.SZ"), + ] - self.assertEqual(parser.DateTimeParser._try_timestamp("1.1"), 1.1) - self.assertEqual(parser.DateTimeParser._try_timestamp("1"), 1) - self.assertEqual(parser.DateTimeParser._try_timestamp("abc"), None) + for pair in input_format_pairs: + with self.assertRaises(ParserError): + self.parser.parse(pair[0], pair[1]) + + def test_parse_with_extra_words_at_start_and_end_valid(self): + # Spaces surrounding the parsable date are ok because we + # allow the parsing of natural language input + self.assertEqual( + self.parser.parse("blah 2016 blah", "YYYY"), datetime(2016, 1, 1) + ) + + self.assertEqual(self.parser.parse("blah 2016", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual(self.parser.parse("2016 blah", "YYYY"), datetime(2016, 1, 1)) + + # test one additional space along with space divider + self.assertEqual( + self.parser.parse( + "blah 2016-05-16 04:05:06.789120", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "2016-05-16 04:05:06.789120 blah", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + # test one additional space along with T divider + self.assertEqual( + self.parser.parse( + "blah 2016-05-16T04:05:06.789120", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "2016-05-16T04:05:06.789120 blah", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16T04:05:06.789120 at the restaurant.", + "YYYY-MM-DDThh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + "Meet me at 2016-05-16 04:05:06.789120 at the restaurant.", + "YYYY-MM-DD hh:mm:ss.S", + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + def test_parse_with_leading_and_trailing_whitespace(self): + self.assertEqual(self.parser.parse(" 2016", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual(self.parser.parse("2016 ", "YYYY"), datetime(2016, 1, 1)) + + self.assertEqual( + self.parser.parse(" 2016 ", "YYYY"), datetime(2016, 1, 1) + ) + + self.assertEqual( + self.parser.parse( + " 2016-05-16 04:05:06.789120 ", "YYYY-MM-DD hh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + self.assertEqual( + self.parser.parse( + " 2016-05-16T04:05:06.789120 ", "YYYY-MM-DDThh:mm:ss.S" + ), + datetime(2016, 5, 16, 4, 5, 6, 789120), + ) + + def test_parse_YYYY_DDDD(self): + self.assertEqual( + self.parser.parse("1998-136", "YYYY-DDDD"), datetime(1998, 5, 16) + ) + + self.assertEqual( + self.parser.parse("1998-006", "YYYY-DDDD"), datetime(1998, 1, 6) + ) + + with self.assertRaises(ParserError): + self.parser.parse("1998-456", "YYYY-DDDD") + + def test_parse_YYYY_DDD(self): + self.assertEqual(self.parser.parse("1998-6", "YYYY-DDD"), datetime(1998, 1, 6)) + + self.assertEqual( + self.parser.parse("1998-136", "YYYY-DDD"), datetime(1998, 5, 16) + ) + + with self.assertRaises(ParserError): + self.parser.parse("1998-756", "YYYY-DDD") + + # month cannot be passed with DDD and DDDD tokens + def test_parse_YYYY_MM_DDDD(self): + with self.assertRaises(ParserError): + self.parser.parse("2015-01-009", "YYYY-MM-DDDD") + + # year is required with the DDD and DDDD tokens + def test_parse_DDD_only(self): + with self.assertRaises(ParserError): + self.parser.parse("5", "DDD") + + def test_parse_DDDD_only(self): + with self.assertRaises(ParserError): + self.parser.parse("145", "DDDD") class DateTimeParserRegexTests(Chai): @@ -393,7 +590,7 @@ def test_format_subsecond(self): def test_format_tz(self): - self.assertEqual(self.format_regex.findall("ZZ-Z"), ["ZZ", "Z"]) + self.assertEqual(self.format_regex.findall("ZZZ-ZZ-Z"), ["ZZZ", "ZZ", "Z"]) def test_format_am_pm(self): @@ -403,6 +600,14 @@ def test_format_timestamp(self): self.assertEqual(self.format_regex.findall("X"), ["X"]) + def test_escape(self): + + escape_regex = parser.DateTimeParser._ESCAPE_RE + + self.assertEqual( + escape_regex.findall("2018-03-09 8 [h] 40 [hello]"), ["[h]", "[hello]"] + ) + def test_month_names(self): p = parser.DateTimeParser("en_us") @@ -423,15 +628,78 @@ def test_month_abbreviations(self): def test_digits(self): + self.assertEqual( + parser.DateTimeParser._ONE_OR_TWO_DIGIT_RE.findall("4-56"), ["4", "56"] + ) + self.assertEqual( + parser.DateTimeParser._ONE_OR_TWO_OR_THREE_DIGIT_RE.findall("4-56-789"), + ["4", "56", "789"], + ) + self.assertEqual( + parser.DateTimeParser._ONE_OR_MORE_DIGIT_RE.findall("4-56-789-1234-12345"), + ["4", "56", "789", "1234", "12345"], + ) self.assertEqual( parser.DateTimeParser._TWO_DIGIT_RE.findall("12-3-45"), ["12", "45"] ) + self.assertEqual( + parser.DateTimeParser._THREE_DIGIT_RE.findall("123-4-56"), ["123"] + ) self.assertEqual( parser.DateTimeParser._FOUR_DIGIT_RE.findall("1234-56"), ["1234"] ) + + def test_tz(self): + tz_z_re = parser.DateTimeParser._TZ_Z_RE + self.assertEqual(tz_z_re.findall("-0700"), [("-", "07", "00")]) + self.assertEqual(tz_z_re.findall("+07"), [("+", "07", "")]) + self.assertTrue(tz_z_re.search("15/01/2019T04:05:06.789120Z") is not None) + self.assertTrue(tz_z_re.search("15/01/2019T04:05:06.789120") is None) + + tz_zz_re = parser.DateTimeParser._TZ_ZZ_RE + self.assertEqual(tz_zz_re.findall("-07:00"), [("-", "07", "00")]) + self.assertEqual(tz_zz_re.findall("+07"), [("+", "07", "")]) + self.assertTrue(tz_zz_re.search("15/01/2019T04:05:06.789120Z") is not None) + self.assertTrue(tz_zz_re.search("15/01/2019T04:05:06.789120") is None) + + tz_name_re = parser.DateTimeParser._TZ_NAME_RE + self.assertEqual(tz_name_re.findall("Europe/Warsaw"), ["Europe/Warsaw"]) + self.assertEqual(tz_name_re.findall("GMT"), ["GMT"]) + + def test_timestamp(self): + timestamp_re = parser.DateTimeParser._TIMESTAMP_RE self.assertEqual( - parser.DateTimeParser._ONE_OR_TWO_DIGIT_RE.findall("4-56"), ["4", "56"] + timestamp_re.findall("1565707550.452729"), ["1565707550.452729"] ) + self.assertEqual(timestamp_re.findall("1565707550"), ["1565707550"]) + self.assertEqual(timestamp_re.findall("1565707550."), []) + self.assertEqual(timestamp_re.findall(".1565707550"), []) + + def test_time(self): + time_re = parser.DateTimeParser._TIME_RE + time_seperators = [":", ""] + + for sep in time_seperators: + self.assertEqual(time_re.findall("12"), [("12", "", "", "", "")]) + self.assertEqual( + time_re.findall("12{sep}35".format(sep=sep)), [("12", "35", "", "", "")] + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46".format(sep=sep)), + [("12", "35", "46", "", "")], + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46.952313".format(sep=sep)), + [("12", "35", "46", ".", "952313")], + ) + self.assertEqual( + time_re.findall("12{sep}35{sep}46,952313".format(sep=sep)), + [("12", "35", "46", ",", "952313")], + ) + + self.assertEqual(time_re.findall("12:"), []) + self.assertEqual(time_re.findall("12:35:46."), []) + self.assertEqual(time_re.findall("12:35:46,"), []) class DateTimeParserISOTests(Chai): @@ -444,6 +712,46 @@ def test_YYYY(self): self.assertEqual(self.parser.parse_iso("2013"), datetime(2013, 1, 1)) + def test_YYYY_DDDD(self): + self.assertEqual(self.parser.parse_iso("1998-136"), datetime(1998, 5, 16)) + + self.assertEqual(self.parser.parse_iso("1998-006"), datetime(1998, 1, 6)) + + with self.assertRaises(ParserError): + self.parser.parse_iso("1998-456") + + # 2016 is a leap year, so Feb 29 exists (leap day) + self.assertEqual(self.parser.parse_iso("2016-059"), datetime(2016, 2, 28)) + self.assertEqual(self.parser.parse_iso("2016-060"), datetime(2016, 2, 29)) + self.assertEqual(self.parser.parse_iso("2016-061"), datetime(2016, 3, 1)) + + # 2017 is not a leap year, so Feb 29 does not exist + self.assertEqual(self.parser.parse_iso("2017-059"), datetime(2017, 2, 28)) + self.assertEqual(self.parser.parse_iso("2017-060"), datetime(2017, 3, 1)) + self.assertEqual(self.parser.parse_iso("2017-061"), datetime(2017, 3, 2)) + + # Since 2016 is a leap year, the 366th day falls in the same year + self.assertEqual(self.parser.parse_iso("2016-366"), datetime(2016, 12, 31)) + + # Since 2017 is not a leap year, the 366th day falls in the next year + self.assertEqual(self.parser.parse_iso("2017-366"), datetime(2018, 1, 1)) + + def test_YYYY_DDDD_HH_mm_ssZ(self): + + self.assertEqual( + self.parser.parse_iso("2013-036 04:05:06+01:00"), + datetime(2013, 2, 5, 4, 5, 6, tzinfo=tz.tzoffset(None, 3600)), + ) + + self.assertEqual( + self.parser.parse_iso("2013-036 04:05:06Z"), + datetime(2013, 2, 5, 4, 5, 6, tzinfo=tz.tzutc()), + ) + + def test_YYYY_MM_DDDD(self): + with self.assertRaises(ParserError): + self.parser.parse_iso("2014-05-125") + def test_YYYY_MM(self): for separator in DateTimeParser.SEPARATORS: @@ -473,6 +781,19 @@ def test_YYYY_MM_DDTHH_mm(self): self.parser.parse_iso("2013-02-03T04:05"), datetime(2013, 2, 3, 4, 5) ) + def test_YYYY_MM_DDTHH(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03T04"), datetime(2013, 2, 3, 4) + ) + + def test_YYYY_MM_DDTHHZ(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03T04+01:00"), + datetime(2013, 2, 3, 4, tzinfo=tz.tzoffset(None, 3600)), + ) + def test_YYYY_MM_DDTHH_mm_ssZ(self): self.assertEqual( @@ -499,6 +820,23 @@ def test_YYYY_MM_DD_HH_mm(self): self.parser.parse_iso("2013-02-03 04:05"), datetime(2013, 2, 3, 4, 5) ) + def test_YYYY_MM_DD_HH(self): + + self.assertEqual( + self.parser.parse_iso("2013-02-03 04"), datetime(2013, 2, 3, 4) + ) + + def test_invalid_time(self): + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03 044") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03 04:05:06.") + def test_YYYY_MM_DD_HH_mm_ssZ(self): self.assertEqual( @@ -581,10 +919,67 @@ def test_YYYY_MM_DDTHH_mm_ss_SZ(self): datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzoffset(None, 3600)), ) - # Properly parse string with Z timezone self.assertEqual( - self.parser.parse_iso("2013-02-03T04:05:06.78912Z"), - datetime(2013, 2, 3, 4, 5, 6, 789120), + self.parser.parse_iso("2013-02-03 04:05:06.78912Z"), + datetime(2013, 2, 3, 4, 5, 6, 789120, tzinfo=tz.tzutc()), + ) + + def test_invalid_Z(self): + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912zz") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912Zz") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912ZZ") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912+Z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912-Z") + + with self.assertRaises(ParserError): + self.parser.parse_iso("2013-02-03T04:05:06.78912 Z") + + def test_parse_subsecond(self): + # TODO: make both test_parse_subsecond functions in Parse and ParseISO + # tests use the same expected objects (use pytest fixtures) + self.expected = datetime(2013, 1, 1, 12, 30, 45, 900000) + self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.9"), self.expected) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 980000) + self.assertEqual(self.parser.parse_iso("2013-01-01 12:30:45.98"), self.expected) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987000) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.987"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987600) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.9876"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987650) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.98765"), self.expected + ) + + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45.987654"), self.expected + ) + + # use comma as subsecond separator + self.expected = datetime(2013, 1, 1, 12, 30, 45, 987654) + self.assertEqual( + self.parser.parse_iso("2013-01-01 12:30:45,987654"), self.expected ) def test_gnu_date(self): @@ -613,6 +1008,118 @@ def test_isoformat(self): self.assertEqual(self.parser.parse_iso(dt.isoformat()), dt) + def test_parse_iso_with_leading_and_trailing_whitespace(self): + datetime_string = " 2016-11-15T06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = " 2016-11-15T06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = "2016-11-15T06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = "2016-11-15T 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # leading whitespace + datetime_string = " 2016-11-15 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # trailing whitespace + datetime_string = "2016-11-15 06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + datetime_string = " 2016-11-15 06:37:19.123456 " + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + # two dividing spaces + datetime_string = "2016-11-15 06:37:19.123456" + with self.assertRaises(ParserError): + self.parser.parse_iso(datetime_string) + + def test_parse_iso_with_extra_words_at_start_and_end_invalid(self): + test_inputs = [ + "blah2016", + "blah2016blah", + "blah 2016 blah", + "blah 2016", + "2016 blah", + "blah 2016-05-16 04:05:06.789120", + "2016-05-16 04:05:06.789120 blah", + "blah 2016-05-16T04:05:06.789120", + "2016-05-16T04:05:06.789120 blah", + "2016blah", + "2016-05blah", + "2016-05-16blah", + "2016-05-16T04:05:06.789120blah", + "2016-05-16T04:05:06.789120ZblahZ", + "2016-05-16T04:05:06.789120Zblah", + "2016-05-16T04:05:06.789120blahZ", + "Meet me at 2016-05-16T04:05:06.789120 at the restaurant.", + "Meet me at 2016-05-16 04:05:06.789120 at the restaurant.", + ] + + for ti in test_inputs: + with self.assertRaises(ParserError): + self.parser.parse_iso(ti) + + def test_iso8601_basic_format(self): + self.assertEqual(self.parser.parse_iso("20180517"), datetime(2018, 5, 17)) + + self.assertEqual( + self.parser.parse_iso("20180517T10"), datetime(2018, 5, 17, 10) + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513.843456"), + datetime(2018, 5, 17, 10, 55, 13, 843456), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513Z"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzutc()), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513.843456-0700"), + datetime(2018, 5, 17, 10, 55, 13, 843456, tzinfo=tz.tzoffset(None, -25200)), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513-0700"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + self.assertEqual( + self.parser.parse_iso("20180517T105513-07"), + datetime(2018, 5, 17, 10, 55, 13, tzinfo=tz.tzoffset(None, -25200)), + ) + + # ordinal in basic format: YYYYDDDD + self.assertEqual(self.parser.parse_iso("1998136"), datetime(1998, 5, 16)) + + # timezone requires +- seperator + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T1055130700") + + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T10551307") + + # too many digits in date + with self.assertRaises(ParserError): + self.parser.parse_iso("201860517T105513Z") + + # too many digits in time + with self.assertRaises(ParserError): + self.parser.parse_iso("20180517T1055213Z") + class TzinfoParserTests(Chai): def setUp(self): @@ -632,9 +1139,20 @@ def test_parse_utc(self): def test_parse_iso(self): self.assertEqual(self.parser.parse("01:00"), tz.tzoffset(None, 3600)) + self.assertEqual( + self.parser.parse("11:35"), tz.tzoffset(None, 11 * 3600 + 2100) + ) self.assertEqual(self.parser.parse("+01:00"), tz.tzoffset(None, 3600)) self.assertEqual(self.parser.parse("-01:00"), tz.tzoffset(None, -3600)) + self.assertEqual(self.parser.parse("0100"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("+0100"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("-0100"), tz.tzoffset(None, -3600)) + + self.assertEqual(self.parser.parse("01"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("+01"), tz.tzoffset(None, 3600)) + self.assertEqual(self.parser.parse("-01"), tz.tzoffset(None, -3600)) + def test_parse_str(self): self.assertEqual(self.parser.parse("US/Pacific"), tz.gettz("US/Pacific")) @@ -750,7 +1268,7 @@ def test_localized_meridians_capitalized(self): parser_.parse("2013-01-01 5 DU", "YYYY-MM-DD h A"), datetime(2013, 1, 1, 17) ) - # regression check for https://github.com/crsmithdev/arrow/issues/607 + # regression test for issue #607 def test_es_meridians(self): parser_ = parser.DateTimeParser("es") @@ -759,6 +1277,18 @@ def test_es_meridians(self): datetime(2019, 6, 30, 20, 0), ) + with self.assertRaises(ParserError): + parser_.parse( + "Junio 30, 2019 - 08:00 pasdfasdfm", "MMMM DD, YYYY - hh:mm a" + ) + + def test_fr_meridians(self): + parser_ = parser.DateTimeParser("fr") + + # the French locale always uses a 24 hour clock, so it does not support meridians + with self.assertRaises(ParserError): + parser_.parse("Janvier 30, 2019 - 08:00 pm", "MMMM DD, YYYY - hh:mm a") + class DateTimeParserMonthOrdinalDayTests(Chai): def setUp(self): diff --git a/tests/util_tests.py b/tests/util_tests.py new file mode 100644 index 000000000..721697732 --- /dev/null +++ b/tests/util_tests.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +import time + +from chai import Chai + +from arrow import util + + +class UtilTests(Chai): + def test_is_timestamp(self): + timestamp_float = time.time() + timestamp_int = int(timestamp_float) + + self.assertTrue(util.is_timestamp(timestamp_int)) + self.assertTrue(util.is_timestamp(timestamp_float)) + + self.assertFalse(util.is_timestamp(str(timestamp_int))) + self.assertFalse(util.is_timestamp(str(timestamp_float))) + self.assertFalse(util.is_timestamp(True)) + self.assertFalse(util.is_timestamp(False)) + + full_datetime = "2019-06-23T13:12:42" + self.assertFalse(util.is_timestamp(full_datetime)) + + overflow_timestamp_float = 99999999999999999999999999.99999999999999999999999999 + with self.assertRaises((OverflowError, ValueError)): + util.is_timestamp(overflow_timestamp_float) + + overflow_timestamp_int = int(overflow_timestamp_float) + with self.assertRaises((OverflowError, ValueError)): + util.is_timestamp(overflow_timestamp_int)