Skip to content

Commit

Permalink
Merge pull request #1102 from ranaroussi/fix/price-tz-and-events
Browse files Browse the repository at this point in the history
Various fixes to price data
  • Loading branch information
ValueRaider committed Oct 21, 2022
2 parents f20aa9a + 40424b7 commit 303e0ea
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 39 deletions.
41 changes: 34 additions & 7 deletions tests/prices.py
Expand Up @@ -12,6 +12,7 @@ def setUp(self):
def tearDown(self):
pass


def test_duplicatingDaily(self):
tkrs = []
tkrs.append("IMP.JO")
Expand Down Expand Up @@ -43,6 +44,7 @@ def test_duplicatingDaily(self):
if not test_run:
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")


def test_duplicatingWeekly(self):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
Expand All @@ -68,14 +70,27 @@ def test_duplicatingWeekly(self):
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")


def test_intraDayWithEvents(self):
# Dividend release pre-market, doesn't merge nicely with intra-day data, so
# check still present
tkr = "ESLT.TA"
start_d = "2022-10-06"
end_d = "2022-10-07"
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True)
self.assertTrue((df1["Dividends"]!=0.0).any())
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present

tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0]
if df_daily_divs.shape[0]==0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")

last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"]!=0.0).any())


def test_dailyWithEvents(self):
# Reproduce issue #521
Expand Down Expand Up @@ -108,6 +123,7 @@ def test_dailyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
Expand Down Expand Up @@ -139,6 +155,7 @@ def test_weeklyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
Expand Down Expand Up @@ -169,5 +186,15 @@ def test_monthlyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_tz_dst_ambiguous(self):
# Reproduce issue #1100

try:
yf.Ticker("ESLT.TA").history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")


if __name__ == '__main__':
unittest.main()
60 changes: 36 additions & 24 deletions yfinance/base.py
Expand Up @@ -290,11 +290,20 @@ def history(self, period="1mo", interval="1d",
"chart"]["result"][0]["meta"]["priceHint"])
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)

if not keepna:
quotes.dropna(inplace=True)

# actions
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
if start is not None:
startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start))
if dividends is not None:
dividends = dividends[dividends.index>=startDt]
if splits is not None:
splits = splits[splits.index>=startDt]
if end is not None:
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
if dividends is not None:
dividends = dividends[dividends.index<endDt]
if splits is not None:
splits = splits[splits.index<endDt]

tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]

Expand All @@ -305,27 +314,27 @@ def history(self, period="1mo", interval="1d",
splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange)
dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange)
if params["interval"] in ["1d","1w","1wk","1mo","3mo"]:
# Converting datetime->date should improve merge performance
quotes.index = _pd.to_datetime(quotes.index.date)
splits.index = _pd.to_datetime(splits.index.date)
dividends.index = _pd.to_datetime(dividends.index.date)
# Converting datetime->date should improve merge performance.
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True)
splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True)
dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True)

# combine
df = quotes
if actions:
df = df.sort_index()
if dividends.shape[0] > 0:
df = utils.safe_merge_dfs(df, dividends, interval)
if "Dividends" in df.columns:
df.loc[df["Dividends"].isna(),"Dividends"] = 0
else:
df["Dividends"] = 0.0
if splits.shape[0] > 0:
df = utils.safe_merge_dfs(df, splits, interval)
if "Stock Splits" in df.columns:
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0
df = quotes.sort_index()
if dividends.shape[0] > 0:
df = utils.safe_merge_dfs(df, dividends, interval)
if "Dividends" in df.columns:
df.loc[df["Dividends"].isna(),"Dividends"] = 0
else:
df["Dividends"] = 0.0
if splits.shape[0] > 0:
df = utils.safe_merge_dfs(df, splits, interval)
if "Stock Splits" in df.columns:
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0

df = utils.fix_Yahoo_dst_issue(df, params["interval"])

Expand All @@ -337,10 +346,13 @@ def history(self, period="1mo", interval="1d",
df.index.name = "Date"

# duplicates and missing rows cleanup
df.dropna(how='all', inplace=True)
df = df[~df.index.duplicated(keep='first')]

self._history = df.copy()
if not actions:
df = df.drop(columns=["Dividends", "Stock Splits"])
if not keepna:
mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1)
df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])

return df

Expand Down
27 changes: 19 additions & 8 deletions yfinance/utils.py
Expand Up @@ -352,6 +352,7 @@ def _reindex_events(df, new_index, data_col_name):
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
new_index = new_index.tz_localize(df.index.tz)
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)

Expand Down Expand Up @@ -386,13 +387,19 @@ def _reindex_events(df, new_index, data_col_name):
dt_sub_i = last_main_dt ; fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1wk" and last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1wk":
if last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt ; fixed = True
elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)):
# With some specific start dates (e.g. around early Jan), Yahoo
# messes up start-of-week, is Saturday not Monday. So check
# if same week another way
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt ; fixed = True
else:
elif interval.endswith('m') or interval.endswith('h'):
td = _pd.to_timedelta(interval)
if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td):
dt_sub_i = last_main_dt ; fixed = True
Expand All @@ -405,11 +412,15 @@ def _reindex_events(df, new_index, data_col_name):
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h'):
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")

return df

Expand Down

0 comments on commit 303e0ea

Please sign in to comment.