diff --git a/.gitignore b/.gitignore index afa82848f..669c9f718 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.py[cod] +.python-version # C extensions *.so @@ -47,6 +48,7 @@ docs/_build # Editors *.swp .idea +.vscode/ # Other raw_data diff --git a/dateparser/conf.py b/dateparser/conf.py index 4bc6b281a..3a7c41f89 100644 --- a/dateparser/conf.py +++ b/dateparser/conf.py @@ -120,7 +120,7 @@ def _check_require_part(setting_name, setting_value): def _check_parsers(setting_name, setting_value): """Returns `True` if the provided list of parsers contains valid values""" existing_parsers = [ - 'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time' + 'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time', 'negative-timestamp' ] # FIXME: Extract the list of existing parsers from another place (#798) unknown_parsers = set(setting_value) - set(existing_parsers) if unknown_parsers: diff --git a/dateparser/date.py b/dateparser/date.py index ac8283ba2..60a6d26a4 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -40,6 +40,7 @@ RE_SANITIZE_APOSTROPHE = re.compile('|'.join(APOSTROPHE_LOOK_ALIKE_CHARS)) RE_SEARCH_TIMESTAMP = re.compile(r'^(\d{10})(\d{3})?(\d{3})?(?![^.])') +RE_SEARCH_NEGATIVE_TIMESTAMP = re.compile(r'^([-]\d{10})(\d{3})?(\d{3})?(?![^.])') def sanitize_spaces(date_string): @@ -112,8 +113,12 @@ def sanitize_date(date_string): return date_string -def get_date_from_timestamp(date_string, settings): - match = RE_SEARCH_TIMESTAMP.search(date_string) +def get_date_from_timestamp(date_string, settings, negative=False): + if negative: + match = RE_SEARCH_NEGATIVE_TIMESTAMP.search(date_string) + else: + match = RE_SEARCH_TIMESTAMP.search(date_string) + if match: seconds = int(match.group(1)) millis = int(match.group(2) or 0) @@ -166,6 +171,7 @@ def __init__(self, locale, date_string, date_formats, settings=None): self._translated_date_with_formatting = None self._parsers = { 'timestamp': self._try_timestamp, + 'negative-timestamp': self._try_negative_timestamp, 'relative-time': self._try_freshness_parser, 'custom-formats': self._try_given_formats, 'absolute-time': self._try_absolute_parser, @@ -185,12 +191,18 @@ def _parse(self): else: return None - def _try_timestamp(self): + def _try_timestamp_parser(self, negative=False): return DateData( - date_obj=get_date_from_timestamp(self.date_string, self._settings), + date_obj=get_date_from_timestamp(self.date_string, self._settings, negative=negative), period='time' if self._settings.RETURN_TIME_AS_PERIOD else 'day', ) + def _try_timestamp(self): + return self._try_timestamp_parser() + + def _try_negative_timestamp(self): + return self._try_timestamp_parser(negative=True) + def _try_freshness_parser(self): try: return freshness_date_parser.get_date_data(self._get_translated_date(), self._settings) diff --git a/docs/settings.rst b/docs/settings.rst index 4cd80d880..337b8e713 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -144,7 +144,7 @@ Language Detection Default Languages +++++++++++++++++ -``DEFAULT_LANGUAGES``: It is a ``list`` of language codes in ISO 639 that will be used as default +``DEFAULT_LANGUAGES``: It is a ``list`` of language codes in ISO 639 that will be used as default languages for parsing when language detection fails. eg. ["en", "fr"]: >>> from dateparser import parse @@ -181,6 +181,9 @@ The following parsers exist: followed by additional digits or a period (``.``), those first 10 digits are interpreted as `Unix time `_. +- ``'negative-timestamp'``: ``'timestamp'`` for negative timestamps. For + example, parses ``-186454800000`` as ``1964-02-03T23:00:00``. + - ``'relative-time'``: Parses dates and times expressed in relation to the current date and time (e.g. “1 day ago”, “in 2 weeks”). diff --git a/tests/test_date.py b/tests/test_date.py index 7ea636663..debc629e9 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -726,6 +726,34 @@ def test_timestamp_in_microseconds(self): datetime.fromtimestamp(1570308760).replace(microsecond=263111) ) + @parameterized.expand([ + param( + input_timestamp='-1570308760', + negative=True, + result=datetime.fromtimestamp(-1570308760) + ), + param( + input_timestamp='-1570308760', + negative=False, + result=None + ), + param( + input_timestamp='1570308760', + negative=True, + result=None + ), + param( + input_timestamp='1570308760', + negative=False, + result=datetime.fromtimestamp(1570308760) + ) + ]) + def test_timestamp_with_negative(self, input_timestamp, negative, result): + self.assertEqual( + date.get_date_from_timestamp(input_timestamp, None, negative=negative), + result + ) + @parameterized.expand([ param(date_string='15703087602631'), param(date_string='157030876026xx'), diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index ead18292b..b9d7bf076 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -645,6 +645,18 @@ def test_parse_timestamp(self, date_string, expected): self.then_date_obj_exactly_is(expected) self.then_period_is('day') + @parameterized.expand([ + param('-1484823450', expected=datetime(1922, 12, 13, 13, 2, 30)), + param('-1436745600000', expected=datetime(1924, 6, 22, 0, 0)), + param('-1015673450000001', expected=datetime(1937, 10, 25, 12, 29, 10, 1)) + ]) + def test_parse_negative_timestamp(self, date_string, expected): + self.given_local_tz_offset(0) + self.given_parser(settings={'TO_TIMEZONE': 'UTC', 'PARSERS': ['negative-timestamp']}) + self.when_date_is_parsed(date_string) + self.then_date_obj_exactly_is(expected) + self.then_period_is('day') + @parameterized.expand([ # Epoch timestamps. param('1484823450', expected=datetime(2017, 1, 19, 10, 57, 30)),