Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests #1143

Merged
merged 1 commit into from Nov 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 45 additions & 0 deletions tests/prices.py
Expand Up @@ -11,6 +11,7 @@
import requests_cache, tempfile

td = tempfile.TemporaryDirectory()
cache_fp = td.name+'/'+"yfinance.cache"


class TestPriceHistory(unittest.TestCase):
Expand Down Expand Up @@ -115,6 +116,8 @@ def test_dailyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -129,6 +132,7 @@ def test_dailyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -146,6 +150,8 @@ def test_weeklyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -160,6 +166,7 @@ def test_weeklyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -176,6 +183,8 @@ def test_monthlyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -190,6 +199,7 @@ def test_monthlyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -207,6 +217,41 @@ def test_tz_dst_ambiguous(self):
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")

def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.

# The correction is successful if no days are weekend, and weekly data begins Monday

tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"

interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday>=0) & (df.index.weekday<=4)).all())

interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday==0).all())
except:
print("Weekly data not aligned to Monday")
raise

def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today()-_dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())

dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday==0).all())


def test_repair_weekly(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
Expand Down
5 changes: 4 additions & 1 deletion yfinance/base.py
Expand Up @@ -286,9 +286,9 @@ def history(self, period="1mo", interval="1d",
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]

# Note: ordering is important. If you change order, run the tests!
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
quotes = utils.set_df_tz(quotes, params["interval"], tz_exchange)
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
if repair:
# Do this before auto/back adjust
quotes = self._fix_unit_mixups(quotes, interval, tz_exchange)
Expand Down Expand Up @@ -355,6 +355,9 @@ def history(self, period="1mo", interval="1d",
df.index.name = "Datetime"
else:
df.index.name = "Date"
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True)

# duplicates and missing rows cleanup
df = df[~df.index.duplicated(keep='first')]
Expand Down
4 changes: 0 additions & 4 deletions yfinance/utils.py
Expand Up @@ -460,10 +460,6 @@ def set_df_tz(df, interval, tz):
if df.index.tz is None:
df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_convert(tz)
if interval in ["1d", "1w", "1wk", "1mo", "3mo"]:
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
df.index = _pd.to_datetime(df.index.date).tz_localize(tz, ambiguous=True)
return df


Expand Down