Skip to content

Commit

Permalink
Merge branch 'dev' into fix/weekly-prices
Browse files Browse the repository at this point in the history
  • Loading branch information
ValueRaider committed Oct 14, 2022
2 parents c679551 + fdf52ac commit 5fdf246
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 21 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Change Log
===========

0.1.77
------
- Fix user experience bug #1078

0.1.75
------
- Fixed datetime-related issues: #1048
- Add 'keepna' argument #1032
- Speedup Ticker() creation #1042
- Improve a bugfix #1033

0.1.74
------
- Fixed bug introduced in 0.1.73 (sorry :/)
Expand Down
100 changes: 100 additions & 0 deletions tests/prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,106 @@ def test_duplicatingWeekly(self):
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")

def test_intraDayWithEvents(self):
# Dividend release pre-market, doesn't merge nicely with intra-day data, so
# check still present
tkr = "ESLT.TA"
start_d = "2022-10-06"
end_d = "2022-10-07"
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True)
self.assertTrue((df1["Dividends"]!=0.0).any())

def test_dailyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise

# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=False)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise

def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise

# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1wk", actions=False)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise

def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise

# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1mo", actions=False)
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise

if __name__ == '__main__':
unittest.main()
66 changes: 47 additions & 19 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,38 @@ def history(self, period="1mo", interval="1d",
error message printing to console.
"""

# Work with errors
debug_mode = True
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
debug_mode = kwargs["debug"]

err_msg = "No data found for this date range, symbol may be delisted"

if start or period is None or period.lower() == "max":
# Check can get TZ. Fail => probably delisted
try:
tz = self._get_ticker_tz()
except KeyError as e:
if "exchangeTimezoneName" in str(e):
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return utils.empty_df()
else:
raise

if end is None:
end = int(_time.time())
else:
end = utils._parse_user_dt(end, self._get_ticker_tz())
end = utils._parse_user_dt(end, tz)
if start is None:
if interval == "1m":
start = end - 604800 # Subtract 7 days
else:
start = -631159200
else:
start = utils._parse_user_dt(start, self._get_ticker_tz())
start = utils._parse_user_dt(start, tz)
params = {"period1": start, "period2": end}
else:
period = period.lower()
Expand Down Expand Up @@ -195,13 +215,6 @@ def history(self, period="1mo", interval="1d",
except Exception:
pass

# Work with errors
debug_mode = True
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
debug_mode = kwargs["debug"]

err_msg = "No data found for this date range, symbol may be delisted"

if data is None or not type(data) is dict or 'status_code' in data.keys():
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
Expand Down Expand Up @@ -289,14 +302,33 @@ def history(self, period="1mo", interval="1d",
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]

quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)

# prepare index for combine:
quotes.index = quotes.index.tz_localize("UTC").tz_convert(tz_exchange)
splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange)
dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange)
if params["interval"] in ["1d","1w","1wk","1mo","3mo"]:
# Converting datetime->date should improve merge performance
quotes.index = _pd.to_datetime(quotes.index.date)
splits.index = _pd.to_datetime(splits.index.date)
dividends.index = _pd.to_datetime(dividends.index.date)

# combine
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
df["Dividends"].fillna(0, inplace=True)
df["Stock Splits"].fillna(0, inplace=True)

# index eod/intraday
df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange)
df = quotes
if actions:
df = df.sort_index()
if dividends.shape[0] > 0:
df = utils.safe_merge_dfs(df, dividends, interval)
if "Dividends" in df.columns:
df.loc[df["Dividends"].isna(),"Dividends"] = 0
else:
df["Dividends"] = 0.0
if splits.shape[0] > 0:
df = utils.safe_merge_dfs(df, splits, interval)
if "Stock Splits" in df.columns:
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0

df = utils.fix_Yahoo_dst_issue(df, params["interval"])

Expand All @@ -305,7 +337,6 @@ def history(self, period="1mo", interval="1d",
elif params["interval"] == "1h":
pass
else:
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange)
df.index.name = "Date"

# duplicates and missing rows cleanup
Expand All @@ -314,9 +345,6 @@ def history(self, period="1mo", interval="1d",

self._history = df.copy()

if not actions:
df.drop(columns=["Dividends", "Stock Splits"], inplace=True)

return df

# ------------------------
Expand Down
116 changes: 115 additions & 1 deletion yfinance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def parse_actions(data):
splits.sort_index(inplace=True)
splits["Stock Splits"] = splits["numerator"] / \
splits["denominator"]
splits = splits["Stock Splits"]
splits = splits[["Stock Splits"]]

return dividends, splits

Expand Down Expand Up @@ -297,6 +297,120 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
return quotes


def safe_merge_dfs(df_main, df_sub, interval):
# Carefully merge 'df_sub' onto 'df_main'
# If naive merge fails, try again with reindexing df_sub:
# 1) if interval is weekly or monthly, then try with index set to start of week/month
# 2) if still failing then manually search through df_main.index to reindex df_sub

if df_sub.shape[0] == 0:
raise Exception("No data to merge")

df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if not c in df_main]
if len(data_cols) > 1:
raise Exception("Expected 1 data col")
data_col = data_cols[0]

def _reindex_events(df, new_index, data_col_name):
if len(new_index) == len(set(new_index)):
# No duplicates, easy
df.index = new_index
return df

df["_NewIndex"] = new_index
# Duplicates present within periods but can aggregate
if data_col_name == "Dividends":
# Add
df = df.groupby("_NewIndex").sum()
df.index.name = None
elif data_col_name == "Stock Splits":
# Product
df = df.groupby("_NewIndex").prod()
df.index.name = None
else:
raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name))
if "_NewIndex" in df.columns:
df = df.drop("_NewIndex",axis=1)
return df

df = df_main.join(df_sub)

f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join()
if interval in ["1wk","1mo","3mo"]:
# Backdate all df_sub.index dates to start of week/month
if interval == "1wk":
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
elif interval == "1mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)

f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
# find matching interval
df_sub = df_sub_backup.copy()
new_index = [-1]*df_sub.shape[0]
for i in range(df_sub.shape[0]):
dt_sub_i = df_sub.index[i]
if dt_sub_i in df_main.index:
new_index[i] = dt_sub_i ; continue
# Found a bad index date, need to search for near-match in df_main (same week/month)
fixed = False
for j in range(df_main.shape[0]-1):
dt_main_j0 = df_main.index[j]
dt_main_j1 = df_main.index[j+1]
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
fixed = True
if interval.endswith('h') or interval.endswith('m'):
# Must also be same day
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
if fixed:
dt_sub_i = dt_main_j0 ; break
if not fixed:
last_main_dt = df_main.index[df_main.shape[0]-1]
diff = dt_sub_i - last_main_dt
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1wk" and last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt ; fixed = True
else:
td = _pd.to_timedelta(interval)
if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td):
dt_sub_i = last_main_dt ; fixed = True
new_index[i] = dt_sub_i
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)

f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)

return df


def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d","1w","1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
Expand Down
2 changes: 1 addition & 1 deletion yfinance/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.1.74"
version = "0.1.77"

0 comments on commit 5fdf246

Please sign in to comment.