From 69dfe325ae93dafaee741bd4d363cfce706ac0ee Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 12:54:48 +0100 Subject: [PATCH 1/7] Add tz to daily price data --- yfinance/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 9d758243f..9264db3da 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -308,10 +308,12 @@ def history(self, period="1mo", interval="1d", splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange) dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange) if params["interval"] in ["1d","1w","1wk","1mo","3mo"]: - # Converting datetime->date should improve merge performance - quotes.index = _pd.to_datetime(quotes.index.date) - splits.index = _pd.to_datetime(splits.index.date) - dividends.index = _pd.to_datetime(dividends.index.date) + # Converting datetime->date should improve merge performance. + # If localizing a midnight during DST transition hour when clocks roll back, + # meaning clock hits midnight twice, then use the 2nd (ambiguous=True) + quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True) + splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True) + dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True) # combine df = quotes From 0f1472859181623cb078f23ad2d6c35bc6b2498f Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 15:26:45 +0100 Subject: [PATCH 2/7] Add test 'test_tz_dst_ambiguous' --- tests/prices.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/prices.py b/tests/prices.py index 2ddee2fa8..e56097c4e 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -169,5 +169,15 @@ def test_monthlyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + + def test_tz_dst_ambiguous(self): + # Reproduce issue #1100 + + try: + yf.Ticker("ESLT.TA").history(start="2002-10-06", end="2002-10-09", interval="1d") + except _tz.exceptions.AmbiguousTimeError: + raise Exception("Ambiguous DST issue not resolved") + + if __name__ == '__main__': unittest.main() From e842a9d6573256ce72cbed9598ba97420c617f28 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 15:26:59 +0100 Subject: [PATCH 3/7] Event-merge fixes: intra-day, weely, lost tz, 'test_intraDayWithEvents' --- tests/prices.py | 28 +++++++++++++++++++++------- yfinance/utils.py | 30 ++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/tests/prices.py b/tests/prices.py index e56097c4e..764906ceb 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -12,6 +12,7 @@ def setUp(self): def tearDown(self): pass + def test_duplicatingDaily(self): tkrs = [] tkrs.append("IMP.JO") @@ -43,6 +44,7 @@ def test_duplicatingDaily(self): if not test_run: self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close") + def test_duplicatingWeekly(self): tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD'] test_run = False @@ -68,14 +70,24 @@ def test_duplicatingWeekly(self): if not test_run: self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend") + def test_intraDayWithEvents(self): - # Dividend release pre-market, doesn't merge nicely with intra-day data, so - # check still present - tkr = "ESLT.TA" - start_d = "2022-10-06" - end_d = "2022-10-07" - df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True) - self.assertTrue((df1["Dividends"]!=0.0).any()) + # TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present + + tkr = "ICL.TA" + start_d = _dt.date.today() - _dt.timedelta(days=365) + end_d = None + df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True) + df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0] + if df_daily_divs.shape[0]==0: + self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 12 months") + + last_div_date = df_daily_divs.index[-1] + start_d = last_div_date.date() + end_d = last_div_date.date() + _dt.timedelta(days=1) + df = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", actions=True) + self.assertTrue((df["Dividends"]!=0.0).any()) + def test_dailyWithEvents(self): # Reproduce issue #521 @@ -108,6 +120,7 @@ def test_dailyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + def test_weeklyWithEvents(self): # Reproduce issue #521 tkr1 = "QQQ" @@ -139,6 +152,7 @@ def test_weeklyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + def test_monthlyWithEvents(self): tkr1 = "QQQ" tkr2 = "GDX" diff --git a/yfinance/utils.py b/yfinance/utils.py index 5ae434da2..776f2d30e 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -349,6 +349,7 @@ def _reindex_events(df, new_index, data_col_name): new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp() elif interval == "3mo": new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp() + new_index = new_index.tz_localize(df.index.tz) df_sub = _reindex_events(df_sub, new_index, data_col) df = df_main.join(df_sub) @@ -383,13 +384,22 @@ def _reindex_events(df, new_index, data_col_name): dt_sub_i = last_main_dt ; fixed = True elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter: dt_sub_i = last_main_dt ; fixed = True - elif interval == "1wk" and last_main_dt.week == dt_sub_i.week: - dt_sub_i = last_main_dt ; fixed = True + elif interval == "1wk": + if last_main_dt.week == dt_sub_i.week: + dt_sub_i = last_main_dt ; fixed = True + elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)): + # With some specific start dates (e.g. around early Jan), Yahoo + # messes up start-of-week, is Saturday not Monday. So check + # if same week another way + dt_sub_i = last_main_dt ; fixed = True + + if fixed: + print("Mapped {} -> {}".format(df_sub.index[i], dt_sub_i)) elif interval == "1d" and last_main_dt.day == dt_sub_i.day: dt_sub_i = last_main_dt ; fixed = True elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour: dt_sub_i = last_main_dt ; fixed = True - else: + elif interval.endswith('m') or interval.endswith('h'): td = _pd.to_timedelta(interval) if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td): dt_sub_i = last_main_dt ; fixed = True @@ -402,11 +412,15 @@ def _reindex_events(df, new_index, data_col_name): if data_lost: ## Not always possible to match events with trading, e.g. when released pre-market. ## So have to append to bottom with nan prices. - f_missing = ~df_sub.index.isin(df.index) - df_sub_missing = df_sub[f_missing] - keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns) - df_sub_missing[list(keys)] = _np.nan - df = _pd.concat([df, df_sub_missing], sort=True) + ## But should only be impossible with intra-day price data. + if interval.endswith('m') or interval.endswith('h'): + f_missing = ~df_sub.index.isin(df.index) + df_sub_missing = df_sub[f_missing] + keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns) + df_sub_missing[list(keys)] = _np.nan + df = _pd.concat([df, df_sub_missing], sort=True) + else: + raise Exception("Lost data during merge despite all attempts to align data (see above)") return df From 841b485b1dad11d694412cf44fe600bdf8e159da Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 15:37:51 +0100 Subject: [PATCH 4/7] Drop out-of-date-range events --- yfinance/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/yfinance/base.py b/yfinance/base.py index 9264db3da..fde56060a 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -298,6 +298,18 @@ def history(self, period="1mo", interval="1d", # actions dividends, splits = utils.parse_actions(data["chart"]["result"][0]) + if start is not None: + startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start)) + if dividends is not None: + dividends = dividends[dividends.index>=startDt] + if splits is not None: + splits = splits[splits.index>=startDt] + if end is not None: + endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end)) + if dividends is not None: + dividends = dividends[dividends.index Date: Fri, 21 Oct 2022 15:44:36 +0100 Subject: [PATCH 5/7] Fix Ticker.dividends property --- yfinance/base.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index fde56060a..0ad37c178 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -328,21 +328,19 @@ def history(self, period="1mo", interval="1d", dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True) # combine - df = quotes - if actions: - df = df.sort_index() - if dividends.shape[0] > 0: - df = utils.safe_merge_dfs(df, dividends, interval) - if "Dividends" in df.columns: - df.loc[df["Dividends"].isna(),"Dividends"] = 0 - else: - df["Dividends"] = 0.0 - if splits.shape[0] > 0: - df = utils.safe_merge_dfs(df, splits, interval) - if "Stock Splits" in df.columns: - df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0 - else: - df["Stock Splits"] = 0.0 + df = quotes.sort_index() + if dividends.shape[0] > 0: + df = utils.safe_merge_dfs(df, dividends, interval) + if "Dividends" in df.columns: + df.loc[df["Dividends"].isna(),"Dividends"] = 0 + else: + df["Dividends"] = 0.0 + if splits.shape[0] > 0: + df = utils.safe_merge_dfs(df, splits, interval) + if "Stock Splits" in df.columns: + df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0 + else: + df["Stock Splits"] = 0.0 df = utils.fix_Yahoo_dst_issue(df, params["interval"]) @@ -359,6 +357,9 @@ def history(self, period="1mo", interval="1d", self._history = df.copy() + if not actions: + df = df.drop(["Dividends", "Stock Splits"], axis=1) + return df # ------------------------ From b018f917a90c430eb659a8ace44d58b716787605 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 17:21:19 +0100 Subject: [PATCH 6/7] Port in: 'Fix when Yahoo returns price=NaNs on dividend day' --- yfinance/base.py | 11 ++++------- yfinance/utils.py | 3 --- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 0ad37c178..223eff9c0 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -293,9 +293,6 @@ def history(self, period="1mo", interval="1d", "chart"]["result"][0]["meta"]["priceHint"]) quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64) - if not keepna: - quotes.dropna(inplace=True) - # actions dividends, splits = utils.parse_actions(data["chart"]["result"][0]) if start is not None: @@ -352,13 +349,13 @@ def history(self, period="1mo", interval="1d", df.index.name = "Date" # duplicates and missing rows cleanup - df.dropna(how='all', inplace=True) df = df[~df.index.duplicated(keep='first')] - self._history = df.copy() - if not actions: - df = df.drop(["Dividends", "Stock Splits"], axis=1) + df = df.drop(columns=["Dividends", "Stock Splits"]) + if not keepna: + mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1) + df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero]) return df diff --git a/yfinance/utils.py b/yfinance/utils.py index 776f2d30e..2cea68f03 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -392,9 +392,6 @@ def _reindex_events(df, new_index, data_col_name): # messes up start-of-week, is Saturday not Monday. So check # if same week another way dt_sub_i = last_main_dt ; fixed = True - - if fixed: - print("Mapped {} -> {}".format(df_sub.index[i], dt_sub_i)) elif interval == "1d" and last_main_dt.day == dt_sub_i.day: dt_sub_i = last_main_dt ; fixed = True elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour: From 40424b71a631ed5ee21df22ff27344f2d4748da1 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 21 Oct 2022 17:26:15 +0100 Subject: [PATCH 7/7] Fix test 'test_intraDayWithEvents' --- tests/prices.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/prices.py b/tests/prices.py index 764906ceb..a338350e3 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -75,12 +75,15 @@ def test_intraDayWithEvents(self): # TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present tkr = "ICL.TA" - start_d = _dt.date.today() - _dt.timedelta(days=365) + # tkr = "ESLT.TA" + # tkr = "ONE.TA" + # tkr = "MGDL.TA" + start_d = _dt.date.today() - _dt.timedelta(days=60) end_d = None df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True) df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0] if df_daily_divs.shape[0]==0: - self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 12 months") + self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days") last_div_date = df_daily_divs.index[-1] start_d = last_div_date.date()