forked from man-group/arctic
/
test_toplevel.py
182 lines (153 loc) · 9.99 KB
/
test_toplevel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
try:
from unittest.mock import Mock, patch, MagicMock, create_autospec, sentinel, call
except:
from mock import Mock, patch, MagicMock, create_autospec, sentinel, call
from datetime import datetime as dt
import numpy as np
import pandas as pd
import pytest
from dateutil.rrule import rrule, DAILY
from mockextras import when
from pandas.util.testing import assert_frame_equal
from arctic.date import DateRange, mktz
from arctic.exceptions import OverlappingDataException
from arctic.exceptions import UnhandledDtypeException
from arctic.tickstore.tickstore import TickStore
from arctic.tickstore.toplevel import TopLevelTickStore, TickStoreLibrary
utc = mktz('UTC')
def test_raise_exception_if_daterange_is_not_provided():
store = TopLevelTickStore(Mock())
with pytest.raises(Exception) as e:
store._get_library_metadata(None)
assert "A date range must be provided" in str(e.value)
def test_raise_exception_if_date_range_does_not_contain_start_date():
store = TopLevelTickStore(Mock())
dr = DateRange(start=None, end=dt(2011, 1, 1))
with pytest.raises(Exception) as e:
store._get_library_metadata(dr)
assert "The date range {0} must contain a start and end date".format(dr) in str(e.value)
def test_raise_exception_if_date_range_does_not_contain_end_date():
store = TopLevelTickStore(Mock())
dr = DateRange(start=dt(2011, 1, 1), end=None)
with pytest.raises(Exception) as e:
store._get_library_metadata(dr)
assert "The date range {0} must contain a start and end date".format(dr) in str(e.value)
def test_raise_exception_if_date_range_does_not_contain_start_and_end_date():
store = TopLevelTickStore(Mock())
dr = DateRange(start=None, end=None)
with pytest.raises(Exception) as e:
store._get_library_metadata(dr)
assert "The date range {0} must contain a start and end date".format(dr) in str(e.value)
def test_raise_exception_and_log_an_error_if_an_invalid_library_name_is_added():
arctic_lib = MagicMock()
arctic_lib.arctic.__getitem__.side_effect = Exception()
store = TopLevelTickStore(arctic_lib)
with patch("arctic.tickstore.toplevel.logger") as mock_logger:
with pytest.raises(Exception):
store.add(None, "blah")
mock_logger.error.assert_called_once_with("Could not load library")
def test_raise_exception_if_date_range_overlaps():
self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock())
self._get_library_metadata.return_value = [TickStoreLibrary('lib1', None), ]
with pytest.raises(OverlappingDataException) as e:
TopLevelTickStore.add(self, DateRange(start=dt(2010, 1, 1), end=dt(2011, 1, 1, 23, 59, 59, 999000)), "blah")
assert "There are libraries that overlap with the date range:" in str(e.value)
@pytest.mark.parametrize(('start', 'end', 'expected_start', 'expected_end'),
[(dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC')),
dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC'))),
(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59, 999000), dt(2010, 1, 1, tzinfo=mktz('UTC')),
dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC'))),
(dt(2009, 12, 31, 19, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, 18, 59, 59, 999000, tzinfo=mktz('America/New_York')),
dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC')))
])
def test_add_library_to_colllection_if_date_range_is_on_UTC_or_naive_day_boundaries(start, end, expected_start, expected_end):
self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
self._get_library_metadata.return_value = []
TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah")
self._collection.update_one.assert_called_once_with({'library_name': "blah"},
{'$set':
{'start': expected_start,
'end': expected_end}}, upsert=True)
@pytest.mark.parametrize(('start', 'end'),
[(dt(2010, 1, 1, 2, tzinfo=mktz('UTC')), dt(2011, 1, 1, tzinfo=mktz('UTC'))),
(dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2011, 1, 1, 2, tzinfo=mktz('UTC'))),
(dt(2010, 1, 1, 2, tzinfo=mktz('UTC')), dt(2011, 1, 1, 2, tzinfo=mktz('UTC'))),
(dt(2010, 1, 1, 2), dt(2011, 1, 1)),
(dt(2010, 1, 1), dt(2011, 1, 1, 2)),
(dt(2010, 1, 1, 2), dt(2011, 1, 1, 2)),
(dt(2009, 12, 31, 21, 10, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, tzinfo=mktz('America/New_York'))),
(dt(2009, 12, 31, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, tzinfo=mktz('America/New_York'))),
(dt(2009, 12, 31, 21, 10, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, 9, 21, tzinfo=mktz('America/New_York')))
])
def test_raise_error_add_library_is_called_with_a_date_range_not_on_day_boundaries(start, end):
with pytest.raises(AssertionError) as e:
self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
self._get_library_metadata.return_value = []
TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah")
assert "Date range should fall on UTC day boundaries" in str(e.value)
@pytest.mark.parametrize(('start', 'end', 'expected_start_index', 'expected_end_index'),
[(dt(2010, 1, 1), dt(2010, 1, 5), 0, 3),
(dt(2010, 1, 1), dt(2010, 1, 6), 0, 3),
(dt(2010, 1, 1, 1), dt(2010, 1, 6), 1, 3),
(dt(2010, 1, 1, 1), dt(2010, 1, 4, 2), 1, 2),
(dt(2009, 1, 1), dt(2010, 1, 5), 0, 3),
])
def test_slice_pandas_dataframe(start, end, expected_start_index, expected_end_index):
top_level_tick_store = TopLevelTickStore(Mock())
dates = pd.date_range('20100101', periods=5, freq='2D')
data = pd.DataFrame(np.random.randn(5, 4), index=dates, columns=list('ABCD'))
expected = data.iloc[expected_start_index:expected_end_index]
result = top_level_tick_store._slice(data, start, end)
assert_frame_equal(expected, result), '{}\n{}'.format(expected, result)
@pytest.mark.parametrize(('start', 'end', 'expected_start_index', 'expected_end_index'),
[(dt(2010, 1, 1, tzinfo=utc), dt(2010, 1, 5, tzinfo=utc), 0, 3),
(dt(2010, 1, 1, tzinfo=utc), dt(2010, 1, 6, tzinfo=utc), 0, 3),
(dt(2010, 1, 1, 1, tzinfo=utc), dt(2010, 1, 6, tzinfo=utc), 1, 3),
(dt(2010, 1, 1, 1, tzinfo=utc), dt(2010, 1, 4, 2, tzinfo=utc), 1, 2),
(dt(2009, 1, 1, tzinfo=utc), dt(2010, 1, 5, tzinfo=utc), 0, 3),
])
def test_slice_list_of_dicts(start, end, expected_start_index, expected_end_index):
top_level_tick_store = TopLevelTickStore(Mock())
dates = list(rrule(DAILY, count=5, dtstart=dt(2010, 1, 1, tzinfo=utc), interval=2))
data = [{'index': date, 'A': val} for date, val in zip(dates, range(5))]
expected = data[expected_start_index:expected_end_index]
result = top_level_tick_store._slice(data, start, end)
assert expected == result
def test_write_pandas_data_to_right_libraries():
self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
self._collection.find.return_value = [{'library_name': sentinel.libname1, 'start': sentinel.st1, 'end': sentinel.end1},
{'library_name': sentinel.libname2, 'start': sentinel.st2, 'end': sentinel.end2}]
slice1 = range(2)
slice2 = range(4)
when(self._slice).called_with(sentinel.data, sentinel.st1, sentinel.end1).then(slice1)
when(self._slice).called_with(sentinel.data, sentinel.st2, sentinel.end2).then(slice2)
mock_lib1 = Mock()
mock_lib2 = Mock()
when(self._arctic_lib.arctic.__getitem__).called_with(sentinel.libname1).then(mock_lib1)
when(self._arctic_lib.arctic.__getitem__).called_with(sentinel.libname2).then(mock_lib2)
with patch("arctic.tickstore.toplevel.to_dt") as patch_to_dt:
patch_to_dt.side_effect = [sentinel.st1, sentinel.end1, sentinel.st2, sentinel.end2]
TopLevelTickStore.write(self, 'blah', sentinel.data)
mock_lib1.write.assert_called_once_with('blah', slice1)
mock_lib2.write.assert_called_once_with('blah', slice2)
def test_read():
self = create_autospec(TopLevelTickStore)
tsl = TickStoreLibrary(create_autospec(TickStore), create_autospec(DateRange))
self._get_libraries.return_value = [tsl, tsl]
dr = create_autospec(DateRange)
with patch('pandas.concat') as concat:
res = TopLevelTickStore.read(self, sentinel.symbol, dr,
columns=sentinel.include_columns,
include_images=sentinel.include_images)
assert concat.call_args_list == [call([tsl.library.read.return_value,
tsl.library.read.return_value])]
assert res == concat.return_value
assert tsl.library.read.call_args_list == [call(sentinel.symbol, tsl.date_range.intersection.return_value,
sentinel.include_columns, include_images=sentinel.include_images),
call(sentinel.symbol, tsl.date_range.intersection.return_value,
sentinel.include_columns, include_images=sentinel.include_images)]
def test_slice_raises():
m = TopLevelTickStore(Mock())
with pytest.raises(UnhandledDtypeException) as e:
m._slice("abc", 1, 2)
assert("Can't persist type" in str(e.value))