diff --git a/dateparser/date.py b/dateparser/date.py index 09bb88ac9..1b68b4a6c 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -4,6 +4,7 @@ from datetime import datetime, timedelta import regex as re +from tzlocal import get_localzone from dateutil.relativedelta import relativedelta from dateparser.date_parser import date_parser @@ -13,7 +14,8 @@ from dateparser.parser import _parse_absolute, _parse_nospaces from dateparser.timezone_parser import pop_tz_offset_from_string from dateparser.utils import apply_timezone_from_settings, \ - set_correct_day_from_settings + set_correct_day_from_settings, \ + get_timezone_from_tz_string from dateparser.custom_language_detection.language_mapping import map_languages APOSTROPHE_LOOK_ALIKE_CHARS = [ @@ -117,14 +119,26 @@ def get_date_from_timestamp(date_string, settings, negative=False): if negative: match = RE_SEARCH_NEGATIVE_TIMESTAMP.search(date_string) else: - match = RE_SEARCH_TIMESTAMP.search(date_string) + match = RE_SEARCH_TIMESTAMP.search(date_string) if match: + if (settings is None or + settings.TIMEZONE is None or + 'local' in settings.TIMEZONE.lower()): + # If the timezone in settings is unset, or it's 'local', use the + # local timezone + timezone = get_localzone() + else: + # Otherwise, use the timezone given in settings + timezone = get_timezone_from_tz_string(settings.TIMEZONE) + seconds = int(match.group(1)) millis = int(match.group(2) or 0) micros = int(match.group(3) or 0) - date_obj = datetime.fromtimestamp(seconds) - date_obj = date_obj.replace(microsecond=millis * 1000 + micros) + date_obj = (datetime + .fromtimestamp(seconds, timezone) + .replace(microsecond=millis * 1000 + micros, tzinfo=None) + ) date_obj = apply_timezone_from_settings(date_obj, settings) return date_obj diff --git a/dateparser/utils/__init__.py b/dateparser/utils/__init__.py index 70b2fae48..b3ae557e7 100644 --- a/dateparser/utils/__init__.py +++ b/dateparser/utils/__init__.py @@ -65,22 +65,23 @@ def _get_missing_parts(fmt): return missing -def localize_timezone(date_time, tz_string): - if date_time.tzinfo: - return date_time - - tz = None - +def get_timezone_from_tz_string(tz_string): try: - tz = timezone(tz_string) + return timezone(tz_string) except UnknownTimeZoneError as e: for name, info in _tz_offsets: if info['regex'].search(' %s' % tz_string): - tz = StaticTzInfo(name, info['offset']) - break + return StaticTzInfo(name, info['offset']) else: raise e + +def localize_timezone(date_time, tz_string): + if date_time.tzinfo: + return date_time + + tz = get_timezone_from_tz_string(tz_string) + if hasattr(tz, 'localize'): date_time = tz.localize(date_time) else: diff --git a/tests/test_date.py b/tests/test_date.py index debc629e9..98958ee91 100644 --- a/tests/test_date.py +++ b/tests/test_date.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +import os import unittest from collections import OrderedDict from copy import copy -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone as dttz +from itertools import product +from time import tzset from unittest.mock import Mock, patch from parameterized import parameterized, param @@ -713,6 +716,38 @@ def then_date_object_is_invalid(self): class TestTimestampParser(BaseTestCase): + def given_parser(self, **params): + self.parser = date.DateDataParser(**params) + + def given_tzstr(self, tzstr): + # Save the existing value + self.old_tzstr = os.environ['TZ'] if 'TZ' in os.environ else None + + # Overwrite the value, or remove it + if tzstr is not None: + os.environ['TZ'] = tzstr + elif 'TZ' in os.environ: + del os.environ['TZ'] + + # Call tzset + tzset() + + def reset_tzstr(self): + # If we never set it with given_tzstr, don't bother resetting it + if not hasattr(self, 'old_tzstr'): + return + + # Restore the old value, or remove it if null + if self.old_tzstr is not None: + os.environ['TZ'] = self.old_tzstr + elif 'TZ' in os.environ: + del os.environ['TZ'] + + # Remove the local attribute + del self.old_tzstr + + # Restore the old timezone behavior + tzset() def test_timestamp_in_milliseconds(self): self.assertEqual( @@ -720,6 +755,35 @@ def test_timestamp_in_milliseconds(self): datetime.fromtimestamp(1570308760).replace(microsecond=263000) ) + @parameterized.expand( + product( + ['1570308760'], + ['EDT', 'EST', 'PDT', 'PST', 'UTC', 'local'], + [None, 'EDT', 'EST', 'PDT', 'PST', 'UTC'], + ['EST5EDT4', 'UTC0', 'PST8PDT7', None], + ) + ) + def test_timestamp_with_different_timestr(self, timestamp, timezone, to_timezone, tzstr): + settings = { + 'RETURN_AS_TIMEZONE_AWARE': True, + 'TIMEZONE': timezone, + } + + # is TO_TIMEZONE supposed to be allowed to be False, or None ??? + if to_timezone is not None: + settings['TO_TIMEZONE'] = to_timezone + + self.given_parser(settings=settings) + + self.given_tzstr(tzstr) + + self.assertEqual( + self.parser.get_date_data(timestamp)['date_obj'], + datetime.fromtimestamp(int(timestamp), dttz.utc) + ) + + self.reset_tzstr() + def test_timestamp_in_microseconds(self): self.assertEqual( date.get_date_from_timestamp('1570308760263111', None),