Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse unix timestamps consistently regardless of timezones #954

Merged
merged 4 commits into from Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 12 additions & 3 deletions dateparser/date.py
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timedelta

import regex as re
from tzlocal import get_localzone
from dateutil.relativedelta import relativedelta

from dateparser.date_parser import date_parser
Expand All @@ -13,7 +14,8 @@
from dateparser.parser import _parse_absolute, _parse_nospaces
from dateparser.timezone_parser import pop_tz_offset_from_string
from dateparser.utils import apply_timezone_from_settings, \
set_correct_day_from_settings
set_correct_day_from_settings, \
get_timezone_from_tz_string
from dateparser.custom_language_detection.language_mapping import map_languages

APOSTROPHE_LOOK_ALIKE_CHARS = [
Expand Down Expand Up @@ -120,11 +122,18 @@ def get_date_from_timestamp(date_string, settings, negative=False):
match = RE_SEARCH_TIMESTAMP.search(date_string)

if match:
if settings is not None and settings.TIMEZONE is not None and 'local' not in settings.TIMEZONE.lower():
Gallaecio marked this conversation as resolved.
Show resolved Hide resolved
onlynone marked this conversation as resolved.
Show resolved Hide resolved
local_timezone = get_timezone_from_tz_string(settings.TIMEZONE)
else:
local_timezone = get_localzone()
onlynone marked this conversation as resolved.
Show resolved Hide resolved

seconds = int(match.group(1))
millis = int(match.group(2) or 0)
micros = int(match.group(3) or 0)
date_obj = datetime.fromtimestamp(seconds)
date_obj = date_obj.replace(microsecond=millis * 1000 + micros)
date_obj = (datetime
.fromtimestamp(seconds, local_timezone)
.replace(microsecond=millis * 1000 + micros, tzinfo=None)
)
date_obj = apply_timezone_from_settings(date_obj, settings)
return date_obj

Expand Down
19 changes: 10 additions & 9 deletions dateparser/utils/__init__.py
Expand Up @@ -65,22 +65,23 @@ def _get_missing_parts(fmt):
return missing


def localize_timezone(date_time, tz_string):
if date_time.tzinfo:
return date_time

tz = None

def get_timezone_from_tz_string(tz_string):
try:
tz = timezone(tz_string)
return timezone(tz_string)
except UnknownTimeZoneError as e:
for name, info in _tz_offsets:
if info['regex'].search(' %s' % tz_string):
tz = StaticTzInfo(name, info['offset'])
break
return StaticTzInfo(name, info['offset'])
else:
raise e


def localize_timezone(date_time, tz_string):
if date_time.tzinfo:
return date_time

tz = get_timezone_from_tz_string(tz_string)

if hasattr(tz, 'localize'):
date_time = tz.localize(date_time)
else:
Expand Down
66 changes: 65 additions & 1 deletion tests/test_date.py
@@ -1,9 +1,12 @@
#!/usr/bin/env python

import os
import unittest
from collections import OrderedDict
from copy import copy
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone as dttz
from itertools import product
from time import tzset

from unittest.mock import Mock, patch
from parameterized import parameterized, param
Expand Down Expand Up @@ -713,13 +716,74 @@ def then_date_object_is_invalid(self):


class TestTimestampParser(BaseTestCase):
def given_parser(self, **params):
self.parser = date.DateDataParser(**params)

def given_tzstr(self, tzstr):
# Save the existing value
self.old_tzstr = os.environ['TZ'] if 'TZ' in os.environ else None

# Overwrite the value, or remove it
if tzstr is not None:
os.environ['TZ'] = tzstr
elif 'TZ' in os.environ:
del os.environ['TZ']

# Call tzset
tzset()

def reset_tzstr(self):
# If we never set it with given_tzstr, don't bother resetting it
if not hasattr(self, 'old_tzstr'):
return

# Restore the old value, or remove it if null
if self.old_tzstr is not None:
os.environ['TZ'] = self.old_tzstr
elif 'TZ' in os.environ:
del os.environ['TZ']

# Remove the local attribute
del self.old_tzstr

# Restore the old timezone behavior
tzset()

def test_timestamp_in_milliseconds(self):
self.assertEqual(
date.get_date_from_timestamp('1570308760263', None),
datetime.fromtimestamp(1570308760).replace(microsecond=263000)
)

@parameterized.expand(
product(
['1570308760'],
['EDT', 'EST', 'PDT', 'PST', 'UTC', 'local'],
[None, 'EDT', 'EST', 'PDT', 'PST', 'UTC'],
['EST5EDT4', 'UTC0', 'PST8PDT7', None],
)
)
def test_timestamp_with_different_timestr(self, timestamp, timezone, to_timezone, tzstr):
settings = {
'RETURN_AS_TIMEZONE_AWARE': True,
'TIMEZONE': timezone,
}

# is TO_TIMEZONE supposed to be allowed to be False, or None ???
onlynone marked this conversation as resolved.
Show resolved Hide resolved
if to_timezone is not None:
settings['TO_TIMEZONE'] = to_timezone

self.given_parser(settings=settings)

self.given_tzstr(tzstr)

self.assertEqual(
self.parser.get_date_data(timestamp)['date_obj'],
datetime.fromtimestamp(int(timestamp), dttz.utc)
)

self.reset_tzstr()

def test_timestamp_in_microseconds(self):
self.assertEqual(
date.get_date_from_timestamp('1570308760263111', None),
Expand Down