-
Notifications
You must be signed in to change notification settings - Fork 673
/
test_econdb.py
110 lines (95 loc) · 3.8 KB
/
test_econdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
import pandas as pd
from pandas import testing as tm
import pytest
from pandas_datareader import data as web
pytestmark = pytest.mark.stable
class TestEcondb(object):
def test_infer_start_end_from_symbols(self):
df = web.DataReader(
(
"dataset=NAMQ_10_GDP&v=Geopolitical entity (reporting)"
"&h=TIME&from=2010-02-01&to=2018-10-01&GEO=[AL,AT,BE,BA,"
"BG,HR,CY,CZ,DK,EE,EA19,FI,FR,DE,EL,HU,IS,IE,IT,XK,LV,LT,"
"LU,MT,ME,NL,MK,NO,PL,PT,RO,RS,SK,SI,ES,SE,CH,TR,UK]"
"&NA_ITEM=[B1GQ]&S_ADJ=[SCA]&UNIT=[CLV10_MNAC]"
),
"econdb",
)
assert df.index[0].year == 2010
assert df.index[-1].year == 2018
@pytest.mark.xfail(reason="Dataset does not exist on Econdb")
def test_get_cdh_e_fos(self):
# EUROSTAT
# Employed doctorate holders in non managerial and non professional
# occupations by fields of science (%)
df = web.DataReader(
"dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL",
"econdb",
start=pd.Timestamp("2005-01-01"),
end=pd.Timestamp("2010-01-01"),
)
assert isinstance(df, pd.DataFrame)
assert df.shape == (2, 4)
# the levels and not returned consistently for econdb
names = list(df.columns.names)
levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]
exp_col = pd.MultiIndex.from_product(levels, names=names)
exp_idx = pd.DatetimeIndex(["2006-01-01", "2009-01-01"], name="TIME_PERIOD")
values = np.array([[25.49, np.nan, 39.05, np.nan], [20.38, 25.1, 27.77, 38.1]])
expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
tm.assert_frame_equal(df, expected)
def test_get_tourism(self):
# OECD
# TOURISM_INBOUND
df = web.DataReader(
"dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL",
"econdb",
start=pd.Timestamp("2008-01-01"),
end=pd.Timestamp("2012-01-01"),
)
df = df.astype(float)
jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
us = np.array(
[175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
)
index = pd.date_range("2008-01-01", "2012-01-01", freq="AS", name="TIME_PERIOD")
# check the values coming back are equal
np.testing.assert_array_equal(df.values[:, 0], jp)
np.testing.assert_array_equal(df.values[:, 1], us)
# sometimes the country and variable columns are swapped
df = df.swaplevel(2, 1, axis=1)
for label, values in [("Japan", jp), ("United States", us)]:
expected = pd.Series(
values, index=index, name="Total international arrivals"
)
expected.index.freq = None
tm.assert_series_equal(
df[label]["Tourism demand surveys"]["Total international arrivals"],
expected,
)
def test_bls(self):
# BLS
# CPI
df = web.DataReader(
"ticker=BLS_CU.CUSR0000SA0.M.US",
"econdb",
start=pd.Timestamp("2010-01-01"),
end=pd.Timestamp("2013-01-27"),
)
assert df.loc["2010-05-01"][0] == 217.3
def test_australia_gdp(self):
df = web.DataReader(
"dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator", "econdb"
)
assert (
df.loc[
"2017-10-01",
(
"GDP per capita: Current prices - National Accounts",
"Seasonally Adjusted",
"AUD",
),
]
== 18329
)