Skip to content

Commit

Permalink
Parse UNIX timestamps consistently regardless of timezones (#954)
Browse files Browse the repository at this point in the history
  • Loading branch information
onlynone committed Sep 20, 2022
1 parent bd49404 commit 28cadc1
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 14 deletions.
22 changes: 18 additions & 4 deletions dateparser/date.py
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timedelta

import regex as re
from tzlocal import get_localzone
from dateutil.relativedelta import relativedelta

from dateparser.date_parser import date_parser
Expand All @@ -13,7 +14,8 @@
from dateparser.parser import _parse_absolute, _parse_nospaces
from dateparser.timezone_parser import pop_tz_offset_from_string
from dateparser.utils import apply_timezone_from_settings, \
set_correct_day_from_settings
set_correct_day_from_settings, \
get_timezone_from_tz_string
from dateparser.custom_language_detection.language_mapping import map_languages

APOSTROPHE_LOOK_ALIKE_CHARS = [
Expand Down Expand Up @@ -117,14 +119,26 @@ def get_date_from_timestamp(date_string, settings, negative=False):
if negative:
match = RE_SEARCH_NEGATIVE_TIMESTAMP.search(date_string)
else:
match = RE_SEARCH_TIMESTAMP.search(date_string)
match = RE_SEARCH_TIMESTAMP.search(date_string)

if match:
if (settings is None or
settings.TIMEZONE is None or
'local' in settings.TIMEZONE.lower()):
# If the timezone in settings is unset, or it's 'local', use the
# local timezone
timezone = get_localzone()
else:
# Otherwise, use the timezone given in settings
timezone = get_timezone_from_tz_string(settings.TIMEZONE)

seconds = int(match.group(1))
millis = int(match.group(2) or 0)
micros = int(match.group(3) or 0)
date_obj = datetime.fromtimestamp(seconds)
date_obj = date_obj.replace(microsecond=millis * 1000 + micros)
date_obj = (datetime
.fromtimestamp(seconds, timezone)
.replace(microsecond=millis * 1000 + micros, tzinfo=None)
)
date_obj = apply_timezone_from_settings(date_obj, settings)
return date_obj

Expand Down
19 changes: 10 additions & 9 deletions dateparser/utils/__init__.py
Expand Up @@ -65,22 +65,23 @@ def _get_missing_parts(fmt):
return missing


def localize_timezone(date_time, tz_string):
if date_time.tzinfo:
return date_time

tz = None

def get_timezone_from_tz_string(tz_string):
try:
tz = timezone(tz_string)
return timezone(tz_string)
except UnknownTimeZoneError as e:
for name, info in _tz_offsets:
if info['regex'].search(' %s' % tz_string):
tz = StaticTzInfo(name, info['offset'])
break
return StaticTzInfo(name, info['offset'])
else:
raise e


def localize_timezone(date_time, tz_string):
if date_time.tzinfo:
return date_time

tz = get_timezone_from_tz_string(tz_string)

if hasattr(tz, 'localize'):
date_time = tz.localize(date_time)
else:
Expand Down
66 changes: 65 additions & 1 deletion tests/test_date.py
@@ -1,9 +1,12 @@
#!/usr/bin/env python

import os
import unittest
from collections import OrderedDict
from copy import copy
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone as dttz
from itertools import product
from time import tzset

from unittest.mock import Mock, patch
from parameterized import parameterized, param
Expand Down Expand Up @@ -713,13 +716,74 @@ def then_date_object_is_invalid(self):


class TestTimestampParser(BaseTestCase):
def given_parser(self, **params):
self.parser = date.DateDataParser(**params)

def given_tzstr(self, tzstr):
# Save the existing value
self.old_tzstr = os.environ['TZ'] if 'TZ' in os.environ else None

# Overwrite the value, or remove it
if tzstr is not None:
os.environ['TZ'] = tzstr
elif 'TZ' in os.environ:
del os.environ['TZ']

# Call tzset
tzset()

def reset_tzstr(self):
# If we never set it with given_tzstr, don't bother resetting it
if not hasattr(self, 'old_tzstr'):
return

# Restore the old value, or remove it if null
if self.old_tzstr is not None:
os.environ['TZ'] = self.old_tzstr
elif 'TZ' in os.environ:
del os.environ['TZ']

# Remove the local attribute
del self.old_tzstr

# Restore the old timezone behavior
tzset()

def test_timestamp_in_milliseconds(self):
self.assertEqual(
date.get_date_from_timestamp('1570308760263', None),
datetime.fromtimestamp(1570308760).replace(microsecond=263000)
)

@parameterized.expand(
product(
['1570308760'],
['EDT', 'EST', 'PDT', 'PST', 'UTC', 'local'],
[None, 'EDT', 'EST', 'PDT', 'PST', 'UTC'],
['EST5EDT4', 'UTC0', 'PST8PDT7', None],
)
)
def test_timestamp_with_different_timestr(self, timestamp, timezone, to_timezone, tzstr):
settings = {
'RETURN_AS_TIMEZONE_AWARE': True,
'TIMEZONE': timezone,
}

# is TO_TIMEZONE supposed to be allowed to be False, or None ???
if to_timezone is not None:
settings['TO_TIMEZONE'] = to_timezone

self.given_parser(settings=settings)

self.given_tzstr(tzstr)

self.assertEqual(
self.parser.get_date_data(timestamp)['date_obj'],
datetime.fromtimestamp(int(timestamp), dttz.utc)
)

self.reset_tzstr()

def test_timestamp_in_microseconds(self):
self.assertEqual(
date.get_date_from_timestamp('1570308760263111', None),
Expand Down

0 comments on commit 28cadc1

Please sign in to comment.