Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various fixes to price data #1102

Merged
merged 7 commits into from Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 34 additions & 7 deletions tests/prices.py
Expand Up @@ -12,6 +12,7 @@ def setUp(self):
def tearDown(self):
pass


def test_duplicatingDaily(self):
tkrs = []
tkrs.append("IMP.JO")
Expand Down Expand Up @@ -43,6 +44,7 @@ def test_duplicatingDaily(self):
if not test_run:
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")


def test_duplicatingWeekly(self):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
Expand All @@ -68,14 +70,27 @@ def test_duplicatingWeekly(self):
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")


def test_intraDayWithEvents(self):
# Dividend release pre-market, doesn't merge nicely with intra-day data, so
# check still present
tkr = "ESLT.TA"
start_d = "2022-10-06"
end_d = "2022-10-07"
df1 = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", prepost=True, actions=True)
self.assertTrue((df1["Dividends"]!=0.0).any())
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present

tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0]
if df_daily_divs.shape[0]==0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")

last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"]!=0.0).any())


def test_dailyWithEvents(self):
# Reproduce issue #521
Expand Down Expand Up @@ -108,6 +123,7 @@ def test_dailyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
Expand Down Expand Up @@ -139,6 +155,7 @@ def test_weeklyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
Expand Down Expand Up @@ -169,5 +186,15 @@ def test_monthlyWithEvents(self):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise


def test_tz_dst_ambiguous(self):
# Reproduce issue #1100

try:
yf.Ticker("ESLT.TA").history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")


if __name__ == '__main__':
unittest.main()
60 changes: 36 additions & 24 deletions yfinance/base.py
Expand Up @@ -293,11 +293,20 @@ def history(self, period="1mo", interval="1d",
"chart"]["result"][0]["meta"]["priceHint"])
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)

if not keepna:
quotes.dropna(inplace=True)

# actions
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
if start is not None:
startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start))
if dividends is not None:
dividends = dividends[dividends.index>=startDt]
if splits is not None:
splits = splits[splits.index>=startDt]
if end is not None:
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
if dividends is not None:
dividends = dividends[dividends.index<endDt]
if splits is not None:
splits = splits[splits.index<endDt]

tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]

Expand All @@ -308,27 +317,27 @@ def history(self, period="1mo", interval="1d",
splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange)
dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange)
if params["interval"] in ["1d","1w","1wk","1mo","3mo"]:
# Converting datetime->date should improve merge performance
quotes.index = _pd.to_datetime(quotes.index.date)
splits.index = _pd.to_datetime(splits.index.date)
dividends.index = _pd.to_datetime(dividends.index.date)
# Converting datetime->date should improve merge performance.
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True)
splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True)
dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True)

# combine
df = quotes
if actions:
df = df.sort_index()
if dividends.shape[0] > 0:
df = utils.safe_merge_dfs(df, dividends, interval)
if "Dividends" in df.columns:
df.loc[df["Dividends"].isna(),"Dividends"] = 0
else:
df["Dividends"] = 0.0
if splits.shape[0] > 0:
df = utils.safe_merge_dfs(df, splits, interval)
if "Stock Splits" in df.columns:
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0
df = quotes.sort_index()
if dividends.shape[0] > 0:
df = utils.safe_merge_dfs(df, dividends, interval)
if "Dividends" in df.columns:
df.loc[df["Dividends"].isna(),"Dividends"] = 0
else:
df["Dividends"] = 0.0
if splits.shape[0] > 0:
df = utils.safe_merge_dfs(df, splits, interval)
if "Stock Splits" in df.columns:
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0

df = utils.fix_Yahoo_dst_issue(df, params["interval"])

Expand All @@ -340,10 +349,13 @@ def history(self, period="1mo", interval="1d",
df.index.name = "Date"

# duplicates and missing rows cleanup
df.dropna(how='all', inplace=True)
df = df[~df.index.duplicated(keep='first')]

self._history = df.copy()
if not actions:
df = df.drop(columns=["Dividends", "Stock Splits"])
if not keepna:
mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1)
df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])

return df

Expand Down
27 changes: 19 additions & 8 deletions yfinance/utils.py
Expand Up @@ -349,6 +349,7 @@ def _reindex_events(df, new_index, data_col_name):
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
new_index = new_index.tz_localize(df.index.tz)
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)

Expand Down Expand Up @@ -383,13 +384,19 @@ def _reindex_events(df, new_index, data_col_name):
dt_sub_i = last_main_dt ; fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1wk" and last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1wk":
if last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt ; fixed = True
elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)):
# With some specific start dates (e.g. around early Jan), Yahoo
# messes up start-of-week, is Saturday not Monday. So check
# if same week another way
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt ; fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt ; fixed = True
else:
elif interval.endswith('m') or interval.endswith('h'):
td = _pd.to_timedelta(interval)
if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td):
dt_sub_i = last_main_dt ; fixed = True
Expand All @@ -402,11 +409,15 @@ def _reindex_events(df, new_index, data_col_name):
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h'):
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")

return df

Expand Down