From b48212e420108afe105c5c55aa8493a531bca4b6 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Wed, 14 Dec 2022 21:16:16 +0000 Subject: [PATCH 1/2] Repair-100x now tolerates zeroes --- yfinance/base.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 4721b9d4b..b3eb72a12 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -591,7 +591,7 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1): if not idx in df_new.index: # Yahoo didn't return finer-grain data for this interval, # so probably no trading happened. - print("no fine data") + # print("no fine data") continue df_new_row = df_new.loc[idx] @@ -646,10 +646,15 @@ def _fix_unit_mixups(self, df, interval, tz_exchange): data_cols = ["High", "Open", "Low", "Close"] # Order important, separate High from Low data_cols = [c for c in data_cols if c in df2.columns] + f_zeroes = (df2[data_cols]==0).any(axis=1) + if f_zeroes.any(): + df2_zeroes = df2[f_zeroes] + df2 = df2[~f_zeroes] + else: + df2_zeroes = None + if df2.shape[0] <= 1: + return df median = _ndimage.median_filter(df2[data_cols].values, size=(3, 3), mode="wrap") - - if (median == 0).any(): - raise Exception("median contains zeroes, why?") ratio = df2[data_cols].values / median ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20 f = ratio_rounded == 100 @@ -715,6 +720,9 @@ def _fix_unit_mixups(self, df, interval, tz_exchange): if fj.any(): c = data_cols[j] df2.loc[fj, c] = df.loc[fj, c] + if df2_zeroes is not None: + df2 = _pd.concat([df2, df2_zeroes]).sort_index() + df2.index = _pd.to_datetime() return df2 From a13bf0cd6c665e15ec16b1d601762c68cd11797e Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Thu, 15 Dec 2022 13:49:13 +0000 Subject: [PATCH 2/2] Hide divide-by-0 warnings --- yfinance/base.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index b3eb72a12..07962398b 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -559,12 +559,21 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1): # Calibrate! Check whether 'df_fine' has different split-adjustment. # If different, then adjust to match 'df' df_block_calib = df_block[price_cols] - calib_filter = df_block_calib.to_numpy() != tag + calib_filter = (df_block_calib != tag).to_numpy() if not calib_filter.any(): # Can't calibrate so don't attempt repair continue df_new_calib = df_new[df_new.index.isin(df_block_calib.index)][price_cols] - ratios = (df_block_calib[price_cols].to_numpy() / df_new_calib[price_cols].to_numpy())[calib_filter] + # Avoid divide-by-zero warnings printing: + df_new_calib = df_new_calib.to_numpy() + df_block_calib = df_block_calib.to_numpy() + for j in range(len(price_cols)): + c = price_cols[j] + f = ~calib_filter[:,j] + if f.any(): + df_block_calib[f,j] = 1 + df_new_calib[f,j] = 1 + ratios = (df_block_calib / df_new_calib)[calib_filter] ratio = _np.mean(ratios) # ratio_rcp = round(1.0 / ratio, 1) @@ -1175,4 +1184,4 @@ def get_history_metadata(self) -> dict: if self._history_metadata is None: raise RuntimeError("Metadata was never retrieved so far, " "call history() to retrieve it") - return self._history_metadata \ No newline at end of file + return self._history_metadata