diff --git a/tests/prices.py b/tests/prices.py index 2ddee2fa8..a338350e3 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -12,6 +12,7 @@ def setUp(self): def tearDown(self): pass + def test_duplicatingDaily(self): tkrs = [] tkrs.append("IMP.JO") @@ -43,6 +44,7 @@ def test_duplicatingDaily(self): if not test_run: self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close") + def test_duplicatingWeekly(self): tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD'] test_run = False @@ -68,14 +70,27 @@ def test_duplicatingWeekly(self): if not test_run: self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend") + def test_intraDayWithEvents(self): - # Dividend release pre-market, doesn't merge nicely with intra-day data, so - # check still present - tkr = "ESLT.TA" - start_d = "2022-10-06" - end_d = "2022-10-07" - df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True) - self.assertTrue((df1["Dividends"]!=0.0).any()) + # TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present + + tkr = "ICL.TA" + # tkr = "ESLT.TA" + # tkr = "ONE.TA" + # tkr = "MGDL.TA" + start_d = _dt.date.today() - _dt.timedelta(days=60) + end_d = None + df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True) + df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0] + if df_daily_divs.shape[0]==0: + self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days") + + last_div_date = df_daily_divs.index[-1] + start_d = last_div_date.date() + end_d = last_div_date.date() + _dt.timedelta(days=1) + df = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", actions=True) + self.assertTrue((df["Dividends"]!=0.0).any()) + def test_dailyWithEvents(self): # Reproduce issue #521 @@ -108,6 +123,7 @@ def test_dailyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + def test_weeklyWithEvents(self): # Reproduce issue #521 tkr1 = "QQQ" @@ -139,6 +155,7 @@ def test_weeklyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + def test_monthlyWithEvents(self): tkr1 = "QQQ" tkr2 = "GDX" @@ -169,5 +186,15 @@ def test_monthlyWithEvents(self): print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2)) raise + + def test_tz_dst_ambiguous(self): + # Reproduce issue #1100 + + try: + yf.Ticker("ESLT.TA").history(start="2002-10-06", end="2002-10-09", interval="1d") + except _tz.exceptions.AmbiguousTimeError: + raise Exception("Ambiguous DST issue not resolved") + + if __name__ == '__main__': unittest.main() diff --git a/yfinance/base.py b/yfinance/base.py index 17b666021..d8e8ecfb6 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -290,11 +290,20 @@ def history(self, period="1mo", interval="1d", "chart"]["result"][0]["meta"]["priceHint"]) quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64) - if not keepna: - quotes.dropna(inplace=True) - # actions dividends, splits = utils.parse_actions(data["chart"]["result"][0]) + if start is not None: + startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start)) + if dividends is not None: + dividends = dividends[dividends.index>=startDt] + if splits is not None: + splits = splits[splits.index>=startDt] + if end is not None: + endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end)) + if dividends is not None: + dividends = dividends[dividends.indexdate should improve merge performance - quotes.index = _pd.to_datetime(quotes.index.date) - splits.index = _pd.to_datetime(splits.index.date) - dividends.index = _pd.to_datetime(dividends.index.date) + # Converting datetime->date should improve merge performance. + # If localizing a midnight during DST transition hour when clocks roll back, + # meaning clock hits midnight twice, then use the 2nd (ambiguous=True) + quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True) + splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True) + dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True) # combine - df = quotes - if actions: - df = df.sort_index() - if dividends.shape[0] > 0: - df = utils.safe_merge_dfs(df, dividends, interval) - if "Dividends" in df.columns: - df.loc[df["Dividends"].isna(),"Dividends"] = 0 - else: - df["Dividends"] = 0.0 - if splits.shape[0] > 0: - df = utils.safe_merge_dfs(df, splits, interval) - if "Stock Splits" in df.columns: - df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0 - else: - df["Stock Splits"] = 0.0 + df = quotes.sort_index() + if dividends.shape[0] > 0: + df = utils.safe_merge_dfs(df, dividends, interval) + if "Dividends" in df.columns: + df.loc[df["Dividends"].isna(),"Dividends"] = 0 + else: + df["Dividends"] = 0.0 + if splits.shape[0] > 0: + df = utils.safe_merge_dfs(df, splits, interval) + if "Stock Splits" in df.columns: + df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0 + else: + df["Stock Splits"] = 0.0 df = utils.fix_Yahoo_dst_issue(df, params["interval"]) @@ -337,10 +346,13 @@ def history(self, period="1mo", interval="1d", df.index.name = "Date" # duplicates and missing rows cleanup - df.dropna(how='all', inplace=True) df = df[~df.index.duplicated(keep='first')] - self._history = df.copy() + if not actions: + df = df.drop(columns=["Dividends", "Stock Splits"]) + if not keepna: + mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1) + df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero]) return df diff --git a/yfinance/utils.py b/yfinance/utils.py index 7dcd770c6..64426603c 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -352,6 +352,7 @@ def _reindex_events(df, new_index, data_col_name): new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp() elif interval == "3mo": new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp() + new_index = new_index.tz_localize(df.index.tz) df_sub = _reindex_events(df_sub, new_index, data_col) df = df_main.join(df_sub) @@ -386,13 +387,19 @@ def _reindex_events(df, new_index, data_col_name): dt_sub_i = last_main_dt ; fixed = True elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter: dt_sub_i = last_main_dt ; fixed = True - elif interval == "1wk" and last_main_dt.week == dt_sub_i.week: - dt_sub_i = last_main_dt ; fixed = True + elif interval == "1wk": + if last_main_dt.week == dt_sub_i.week: + dt_sub_i = last_main_dt ; fixed = True + elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)): + # With some specific start dates (e.g. around early Jan), Yahoo + # messes up start-of-week, is Saturday not Monday. So check + # if same week another way + dt_sub_i = last_main_dt ; fixed = True elif interval == "1d" and last_main_dt.day == dt_sub_i.day: dt_sub_i = last_main_dt ; fixed = True elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour: dt_sub_i = last_main_dt ; fixed = True - else: + elif interval.endswith('m') or interval.endswith('h'): td = _pd.to_timedelta(interval) if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td): dt_sub_i = last_main_dt ; fixed = True @@ -405,11 +412,15 @@ def _reindex_events(df, new_index, data_col_name): if data_lost: ## Not always possible to match events with trading, e.g. when released pre-market. ## So have to append to bottom with nan prices. - f_missing = ~df_sub.index.isin(df.index) - df_sub_missing = df_sub[f_missing] - keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns) - df_sub_missing[list(keys)] = _np.nan - df = _pd.concat([df, df_sub_missing], sort=True) + ## But should only be impossible with intra-day price data. + if interval.endswith('m') or interval.endswith('h'): + f_missing = ~df_sub.index.isin(df.index) + df_sub_missing = df_sub[f_missing] + keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns) + df_sub_missing[list(keys)] = _np.nan + df = _pd.concat([df, df_sub_missing], sort=True) + else: + raise Exception("Lost data during merge despite all attempts to align data (see above)") return df