diff --git a/tests/prices.py b/tests/prices.py index 92144f9b2..2ddee2fa8 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -2,6 +2,9 @@ import unittest +import datetime as _dt +import pytz as _tz + class TestPriceHistory(unittest.TestCase): def setUp(self): pass @@ -9,6 +12,62 @@ def setUp(self): def tearDown(self): pass + def test_duplicatingDaily(self): + tkrs = [] + tkrs.append("IMP.JO") + tkrs.append("BHG.JO") + tkrs.append("SSW.JO") + tkrs.append("BP.L") + tkrs.append("INTC") + test_run = False + for tkr in tkrs: + dat = yf.Ticker(tkr) + tz = dat._get_ticker_tz() + + dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow()) + dt = dt_utc.astimezone(_tz.timezone(tz)) + if dt.time() < _dt.time(17,0): + continue + test_run = True + + df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1d") + + dt0 = df.index[-2] + dt1 = df.index[-1] + try: + self.assertNotEqual(dt0, dt1) + except: + print("Ticker = ", tkr) + raise + + if not test_run: + self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close") + + def test_duplicatingWeekly(self): + tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD'] + test_run = False + for tkr in tkrs: + dat = yf.Ticker(tkr) + tz = dat._get_ticker_tz() + + dt = _tz.timezone(tz).localize(_dt.datetime.now()) + if dt.date().weekday() not in [1,2,3,4]: + continue + test_run = True + + df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1wk") + dt0 = df.index[-2] + dt1 = df.index[-1] + try: + self.assertNotEqual(dt0.week, dt1.week) + except: + print("Ticker={}: Last two rows within same week:".format(tkr)) + print(df.iloc[df.shape[0]-2:]) + raise + + if not test_run: + self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend") + def test_intraDayWithEvents(self): # Dividend release pre-market, doesn't merge nicely with intra-day data, so # check still present diff --git a/yfinance/base.py b/yfinance/base.py index 4694a1add..9d758243f 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -301,6 +301,8 @@ def history(self, period="1mo", interval="1d", tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"] + quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange) + # prepare index for combine: quotes.index = quotes.index.tz_localize("UTC").tz_convert(tz_exchange) splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange) diff --git a/yfinance/utils.py b/yfinance/utils.py index 0871a2918..5ae434da2 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -252,6 +252,51 @@ def parse_actions(data): return dividends, splits +def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange): + # Yahoo bug fix. If market is open today then Yahoo normally returns + # todays data as a separate row from rest-of week/month interval in above row. + # Seems to depend on what exchange e.g. crypto OK. + # Fix = merge them together + n = quotes.shape[0] + if n > 1: + dt1 = quotes.index[n-1].tz_localize("UTC").tz_convert(tz_exchange) + dt2 = quotes.index[n-2].tz_localize("UTC").tz_convert(tz_exchange) + if interval in ["1wk", "1mo", "3mo"]: + if interval == "1wk": + last_rows_same_interval = dt1.year==dt2.year and dt1.week==dt2.week + elif interval == "1mo": + last_rows_same_interval = dt1.month==dt2.month + elif interval == "3mo": + last_rows_same_interval = dt1.year==dt2.year and dt1.quarter==dt2.quarter + if last_rows_same_interval: + # Last two rows are within same interval + idx1 = quotes.index[n-1] + idx2 = quotes.index[n-2] + if _np.isnan(quotes.loc[idx2,"Open"]): + quotes.loc[idx2,"Open"] = quotes["Open"][n-1] + # Note: nanmax() & nanmin() ignores NaNs + quotes.loc[idx2,"High"] = _np.nanmax([quotes["High"][n-1], quotes["High"][n-2]]) + quotes.loc[idx2,"Low"] = _np.nanmin([quotes["Low"][n-1], quotes["Low"][n-2]]) + quotes.loc[idx2,"Close"] = quotes["Close"][n-1] + if "Adj High" in quotes.columns: + quotes.loc[idx2,"Adj High"] = _np.nanmax([quotes["Adj High"][n-1], quotes["Adj High"][n-2]]) + if "Adj Low" in quotes.columns: + quotes.loc[idx2,"Adj Low"] = _np.nanmin([quotes["Adj Low"][n-1], quotes["Adj Low"][n-2]]) + if "Adj Close" in quotes.columns: + quotes.loc[idx2,"Adj Close"] = quotes["Adj Close"][n-1] + quotes.loc[idx2,"Volume"] += quotes["Volume"][n-1] + quotes = quotes.drop(quotes.index[n-1]) + + # Similar bug in daily data except most data is simply duplicated + # - exception is volume, *slightly* greater on final row (and matches website) + elif interval=="1d": + if dt1.date() == dt2.date(): + # Last two rows are on same day. Drop second-to-last row + quotes = quotes.drop(quotes.index[n-2]) + + return quotes + + def safe_merge_dfs(df_main, df_sub, interval): # Carefully merge 'df_sub' onto 'df_main' # If naive merge fails, try again with reindexing df_sub: