From 5c0b2bbaa3b452b7bc628897c609044436ae6a9f Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 2 Oct 2022 18:26:05 +0100 Subject: [PATCH 1/2] Fix weekly/monthly prices across 2 rows --- yfinance/base.py | 2 ++ yfinance/utils.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/yfinance/base.py b/yfinance/base.py index 6885d374b..5ec9421e7 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -288,6 +288,8 @@ def history(self, period="1mo", interval="1d", tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"] + quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange) + # combine df = _pd.concat([quotes, dividends, splits], axis=1, sort=True) df["Dividends"].fillna(0, inplace=True) diff --git a/yfinance/utils.py b/yfinance/utils.py index d611575c3..be775ac4b 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -252,6 +252,52 @@ def parse_actions(data): return dividends, splits +def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange): + # Yahoo bug fix. If market is open today then Yahoo normally returns + # todays data as a separate row from rest-of week/month interval in above row. + # Seems to depend on what exchange e.g. crypto OK. + # Fix = merge them together + n = quotes.shape[0] + if n > 1: + dt1 = quotes.index[n-1].tz_localize("UTC").tz_convert(tz_exchange) + dt2 = quotes.index[n-2].tz_localize("UTC").tz_convert(tz_exchange) + if interval in ["1wk", "1mo", "3mo"]: + if interval == "1wk": + last_rows_same_interval = dt1.year==dt2.year and dt1.week==dt2.week + elif interval == "1mo": + last_rows_same_interval = dt1.month==dt2.month + elif interval == "3mo": + last_rows_same_interval = dt1.year==dt2.year and dt1.quarter==dt2.quarter + if last_rows_same_interval: + # Last two rows are within same interval + idx1 = quotes.index[n-1] + idx2 = quotes.index[n-2] + if _np.isnan(quotes.loc[idx2,"Open"]): + quotes.loc[idx2,"Open"] = quotes["Open"][n-1] + # Note: nanmax() & nanmin() ignores NaNs + quotes.loc[idx2,"High"] = _np.nanmax([quotes["High"][n-1], quotes["High"][n-2]]) + quotes.loc[idx2,"Low"] = _np.nanmin([quotes["Low"][n-1], quotes["Low"][n-2]]) + quotes.loc[idx2,"Close"] = quotes["Close"][n-1] + if "Adj High" in quotes.columns: + quotes.loc[idx2,"Adj High"] = _np.nanmax([quotes["Adj High"][n-1], quotes["Adj High"][n-2]]) + if "Adj Low" in quotes.columns: + quotes.loc[idx2,"Adj Low"] = _np.nanmin([quotes["Adj Low"][n-1], quotes["Adj Low"][n-2]]) + if "Adj Close" in quotes.columns: + quotes.loc[idx2,"Adj Close"] = quotes["Adj Close"][n-1] + quotes.loc[idx2,"Volume"] += quotes["Volume"][n-1] + quotes = quotes.iloc[0:n-1] + n = quotes.shape[0] + + # Similar bug in daily data except most data is simply duplicated + # - exception is volume, *slightly* different on final row (and matches website) + elif interval=="1d": + if dt1.date() == dt2.date(): + # Last two rows are on same day. Drop second-to-last row + quotes = quotes.drop(quotes.index[n-2]) + n = quotes.shape[0] + return quotes + + def fix_Yahoo_dst_issue(df, interval): if interval in ["1d","1w","1wk"]: # These intervals should start at time 00:00. But for some combinations of date and timezone, From c679551faacf032a76cac990be0a5521d81dbcea Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 14 Oct 2022 23:15:13 +0100 Subject: [PATCH 2/2] Add unittest for duplication fix --- tests/__init__.py | 1 + tests/context.py | 9 ++++++ tests/prices.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++ yfinance/utils.py | 9 +++--- 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/context.py create mode 100644 tests/prices.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..4265cc3e6 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/tests/context.py b/tests/context.py new file mode 100644 index 000000000..fe647f890 --- /dev/null +++ b/tests/context.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +import sys +import os +_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +_src_dp = _parent_dp +sys.path.insert(0, _src_dp) + +import yfinance diff --git a/tests/prices.py b/tests/prices.py new file mode 100644 index 000000000..b5c9d223c --- /dev/null +++ b/tests/prices.py @@ -0,0 +1,73 @@ +from .context import yfinance as yf + +import unittest + +import datetime as _dt +import pytz as _tz + +class TestPriceHistory(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_duplicatingDaily(self): + tkrs = [] + tkrs.append("IMP.JO") + tkrs.append("BHG.JO") + tkrs.append("SSW.JO") + tkrs.append("BP.L") + tkrs.append("INTC") + test_run = False + for tkr in tkrs: + dat = yf.Ticker(tkr) + tz = dat._get_ticker_tz() + + dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow()) + dt = dt_utc.astimezone(_tz.timezone(tz)) + if dt.time() < _dt.time(17,0): + continue + test_run = True + + df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1d") + + dt0 = df.index[-2] + dt1 = df.index[-1] + try: + self.assertNotEqual(dt0, dt1) + except: + print("Ticker = ", tkr) + raise + + if not test_run: + self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close") + + def test_duplicatingWeekly(self): + tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD'] + test_run = False + for tkr in tkrs: + dat = yf.Ticker(tkr) + tz = dat._get_ticker_tz() + + dt = _tz.timezone(tz).localize(_dt.datetime.now()) + if dt.date().weekday() not in [1,2,3,4]: + continue + test_run = True + + df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1wk") + dt0 = df.index[-2] + dt1 = df.index[-1] + try: + self.assertNotEqual(dt0.week, dt1.week) + except: + print("Ticker={}: Last two rows within same week:".format(tkr)) + print(df.iloc[df.shape[0]-2:]) + raise + + if not test_run: + self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend") + + +if __name__ == '__main__': + unittest.main() diff --git a/yfinance/utils.py b/yfinance/utils.py index be775ac4b..2b79445cb 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -285,16 +285,15 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange): if "Adj Close" in quotes.columns: quotes.loc[idx2,"Adj Close"] = quotes["Adj Close"][n-1] quotes.loc[idx2,"Volume"] += quotes["Volume"][n-1] - quotes = quotes.iloc[0:n-1] - n = quotes.shape[0] - + quotes = quotes.drop(quotes.index[n-1]) + # Similar bug in daily data except most data is simply duplicated - # - exception is volume, *slightly* different on final row (and matches website) + # - exception is volume, *slightly* greater on final row (and matches website) elif interval=="1d": if dt1.date() == dt2.date(): # Last two rows are on same day. Drop second-to-last row quotes = quotes.drop(quotes.index[n-2]) - n = quotes.shape[0] + return quotes