Merge pull request #1102 from ranaroussi/fix/price-tz-and-events

Various fixes to price data
ranaroussi · Oct 21, 2022 · 303e0ea · 303e0ea
2 parents f20aa9a + 40424b7
commit 303e0ea
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 39 deletions.
diff --git a/tests/prices.py b/tests/prices.py
@@ -12,6 +12,7 @@ def setUp(self):
 	def tearDown(self):
 		pass
 
+
 	def test_duplicatingDaily(self):
 		tkrs = []
 		tkrs.append("IMP.JO")
@@ -43,6 +44,7 @@ def test_duplicatingDaily(self):
 		if not test_run:
 			self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
 
+
 	def test_duplicatingWeekly(self):
 		tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
 		test_run = False
@@ -68,14 +70,27 @@ def test_duplicatingWeekly(self):
 		if not test_run:
 			self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
 
+
 	def test_intraDayWithEvents(self):
-		# Dividend release pre-market, doesn't merge nicely with intra-day data, so
-		# check still present
-		tkr = "ESLT.TA"
-		start_d = "2022-10-06"
-		end_d = "2022-10-07"
-		df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True)
-		self.assertTrue((df1["Dividends"]!=0.0).any())
+		# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
+
+		tkr = "ICL.TA"
+		# tkr = "ESLT.TA"
+		# tkr = "ONE.TA"
+		# tkr = "MGDL.TA"
+		start_d = _dt.date.today() - _dt.timedelta(days=60)
+		end_d = None
+		df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True)
+		df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0]
+		if df_daily_divs.shape[0]==0:
+			self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
+
+		last_div_date = df_daily_divs.index[-1]
+		start_d = last_div_date.date()
+		end_d = last_div_date.date() + _dt.timedelta(days=1)
+		df = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", actions=True)
+		self.assertTrue((df["Dividends"]!=0.0).any())
+
 
 	def test_dailyWithEvents(self):
 		# Reproduce issue #521
@@ -108,6 +123,7 @@ def test_dailyWithEvents(self):
 				print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
 				raise
 
+
 	def test_weeklyWithEvents(self):
 		# Reproduce issue #521
 		tkr1 = "QQQ"
@@ -139,6 +155,7 @@ def test_weeklyWithEvents(self):
 				print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
 				raise
 
+
 	def test_monthlyWithEvents(self):
 		tkr1 = "QQQ"
 		tkr2 = "GDX"
@@ -169,5 +186,15 @@ def test_monthlyWithEvents(self):
 				print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
 				raise
 
+
+	def test_tz_dst_ambiguous(self):
+		# Reproduce issue #1100
+
+		try:
+			yf.Ticker("ESLT.TA").history(start="2002-10-06", end="2002-10-09", interval="1d")
+		except _tz.exceptions.AmbiguousTimeError:
+			raise Exception("Ambiguous DST issue not resolved")
+
+
 if __name__ == '__main__':
 	unittest.main()
diff --git a/yfinance/base.py b/yfinance/base.py
@@ -290,11 +290,20 @@ def history(self, period="1mo", interval="1d",
                 "chart"]["result"][0]["meta"]["priceHint"])
         quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
 
-        if not keepna:
-            quotes.dropna(inplace=True)
-
         # actions
         dividends, splits = utils.parse_actions(data["chart"]["result"][0])
+        if start is not None:
+            startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start))
+            if dividends is not None:
+                dividends = dividends[dividends.index>=startDt]
+            if splits is not None:
+                splits = splits[splits.index>=startDt]
+        if end is not None:
+            endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
+            if dividends is not None:
+                dividends = dividends[dividends.index<endDt]
+            if splits is not None:
+                splits = splits[splits.index<endDt]
 
         tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
 
@@ -305,27 +314,27 @@ def history(self, period="1mo", interval="1d",
         splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange)
         dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange)
         if params["interval"] in ["1d","1w","1wk","1mo","3mo"]:
-            # Converting datetime->date should improve merge performance
-            quotes.index = _pd.to_datetime(quotes.index.date)
-            splits.index = _pd.to_datetime(splits.index.date)
-            dividends.index = _pd.to_datetime(dividends.index.date)
+            # Converting datetime->date should improve merge performance.
+            # If localizing a midnight during DST transition hour when clocks roll back, 
+            # meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
+            quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True)
+            splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True)
+            dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True)
 
         # combine
-        df = quotes
-        if actions:
-            df = df.sort_index()
-            if dividends.shape[0] > 0:
-                df = utils.safe_merge_dfs(df, dividends, interval)
-            if "Dividends" in df.columns:
-                df.loc[df["Dividends"].isna(),"Dividends"] = 0
-            else:
-                df["Dividends"] = 0.0
-            if splits.shape[0] > 0:
-                df = utils.safe_merge_dfs(df, splits, interval)
-            if "Stock Splits" in df.columns:
-                df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
-            else:
-                df["Stock Splits"] = 0.0
+        df = quotes.sort_index()
+        if dividends.shape[0] > 0:
+            df = utils.safe_merge_dfs(df, dividends, interval)
+        if "Dividends" in df.columns:
+            df.loc[df["Dividends"].isna(),"Dividends"] = 0
+        else:
+            df["Dividends"] = 0.0
+        if splits.shape[0] > 0:
+            df = utils.safe_merge_dfs(df, splits, interval)
+        if "Stock Splits" in df.columns:
+            df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
+        else:
+            df["Stock Splits"] = 0.0
 
         df = utils.fix_Yahoo_dst_issue(df, params["interval"])
 
@@ -337,10 +346,13 @@ def history(self, period="1mo", interval="1d",
             df.index.name = "Date"
 
         # duplicates and missing rows cleanup
-        df.dropna(how='all', inplace=True)
         df = df[~df.index.duplicated(keep='first')]
-
         self._history = df.copy()
+        if not actions:
+            df = df.drop(columns=["Dividends", "Stock Splits"])
+        if not keepna:
+            mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1)
+            df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
 
         return df
 

diff --git a/yfinance/utils.py b/yfinance/utils.py
@@ -352,6 +352,7 @@ def _reindex_events(df, new_index, data_col_name):
             new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
         elif interval == "3mo":
             new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
+        new_index = new_index.tz_localize(df.index.tz)
         df_sub = _reindex_events(df_sub, new_index, data_col)
         df = df_main.join(df_sub)
 
@@ -386,13 +387,19 @@ def _reindex_events(df, new_index, data_col_name):
                 dt_sub_i = last_main_dt ; fixed = True
             elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
                 dt_sub_i = last_main_dt ; fixed = True
-            elif interval == "1wk" and last_main_dt.week == dt_sub_i.week:
-                dt_sub_i = last_main_dt ; fixed = True
+            elif interval == "1wk":
+                if last_main_dt.week == dt_sub_i.week:
+                    dt_sub_i = last_main_dt ; fixed = True
+                elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)):
+                    # With some specific start dates (e.g. around early Jan), Yahoo
+                    # messes up start-of-week, is Saturday not Monday. So check
+                    # if same week another way
+                    dt_sub_i = last_main_dt ; fixed = True
             elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
                 dt_sub_i = last_main_dt ; fixed = True
             elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
                 dt_sub_i = last_main_dt ; fixed = True
-            else:
+            elif interval.endswith('m') or interval.endswith('h'):
                 td = _pd.to_timedelta(interval)
                 if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td):
                     dt_sub_i = last_main_dt ; fixed = True
@@ -405,11 +412,15 @@ def _reindex_events(df, new_index, data_col_name):
     if data_lost:
         ## Not always possible to match events with trading, e.g. when released pre-market.
         ## So have to append to bottom with nan prices.
-        f_missing = ~df_sub.index.isin(df.index)
-        df_sub_missing = df_sub[f_missing]
-        keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
-        df_sub_missing[list(keys)] = _np.nan
-        df = _pd.concat([df, df_sub_missing], sort=True)
+        ## But should only be impossible with intra-day price data.
+        if interval.endswith('m') or interval.endswith('h'):
+            f_missing = ~df_sub.index.isin(df.index)
+            df_sub_missing = df_sub[f_missing]
+            keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
+            df_sub_missing[list(keys)] = _np.nan
+            df = _pd.concat([df, df_sub_missing], sort=True)
+        else:
+            raise Exception("Lost data during merge despite all attempts to align data (see above)")
 
     return df