Skip to content

Commit

Permalink
Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ValueRaider committed Nov 6, 2022
1 parent 45169d9 commit 1b439c4
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
45 changes: 45 additions & 0 deletions tests/prices.py
Expand Up @@ -11,6 +11,7 @@
import requests_cache, tempfile

td = tempfile.TemporaryDirectory()
cache_fp = td.name+'/'+"yfinance.cache"


class TestPriceHistory(unittest.TestCase):
Expand Down Expand Up @@ -115,6 +116,8 @@ def test_dailyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -129,6 +132,7 @@ def test_dailyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -146,6 +150,8 @@ def test_weeklyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -160,6 +166,7 @@ def test_weeklyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -176,6 +183,8 @@ def test_monthlyWithEvents(self):
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -190,6 +199,7 @@ def test_monthlyWithEvents(self):
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
Expand All @@ -207,6 +217,41 @@ def test_tz_dst_ambiguous(self):
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")

def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.

# The correction is successful if no days are weekend, and weekly data begins Monday

tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"

interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday>=0) & (df.index.weekday<=4)).all())

interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday==0).all())
except:
print("Weekly data not aligned to Monday")
raise

def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today()-_dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())

dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday==0).all())


def test_repair_weekly(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
Expand Down
5 changes: 4 additions & 1 deletion yfinance/base.py
Expand Up @@ -286,9 +286,9 @@ def history(self, period="1mo", interval="1d",
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]

# Note: ordering is important. If you change order, run the tests!
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
quotes = utils.set_df_tz(quotes, params["interval"], tz_exchange)
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
if repair:
# Do this before auto/back adjust
quotes = self._fix_unit_mixups(quotes, interval, tz_exchange)
Expand Down Expand Up @@ -355,6 +355,9 @@ def history(self, period="1mo", interval="1d",
df.index.name = "Datetime"
else:
df.index.name = "Date"
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True)

# duplicates and missing rows cleanup
df = df[~df.index.duplicated(keep='first')]
Expand Down
4 changes: 0 additions & 4 deletions yfinance/utils.py
Expand Up @@ -460,10 +460,6 @@ def set_df_tz(df, interval, tz):
if df.index.tz is None:
df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_convert(tz)
if interval in ["1d", "1w", "1wk", "1mo", "3mo"]:
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
df.index = _pd.to_datetime(df.index.date).tz_localize(tz, ambiguous=True)
return df


Expand Down

0 comments on commit 1b439c4

Please sign in to comment.