diff --git a/.gitignore b/.gitignore index 2dac9c903..5c73cbf9d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,10 @@ build/ *.html *.css *.png + +# Environments +.env +.venv +env/ +venv/ +ENV/ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 245505dfa..ce989f113 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,16 @@ Change Log =========== +0.2.0rc1 +------ +Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes +- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099 +- price repair feature #1110 +- fix merging of dividends/splits with prices #1069 #1086 #1102 +- fix Yahoo returning latest price interval across 2 rows #1070 +- optional: raise errors as exceptions: raise_errors=True #1104 +- add proper unit tests #1069 + 0.1.81 ------ - Fix unhandled tz-cache exception #1107 diff --git a/requirements.txt b/requirements.txt index 2e1bae2dc..28964912b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,7 @@ requests>=2.26 multitasking>=0.0.7 lxml>=4.5.1 appdirs>=1.4.4 -pytz>=2022.5 \ No newline at end of file +pytz>=2022.5 +frozendict>=2.3.4 +beautifulsoup4>=4.11.1 +html5lib>=1.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 442a48c50..fac4f1123 100644 --- a/setup.py +++ b/setup.py @@ -38,8 +38,8 @@ classifiers=[ 'License :: OSI Approved :: Apache Software License', # 'Development Status :: 3 - Alpha', - # 'Development Status :: 4 - Beta', - 'Development Status :: 5 - Production/Stable', + 'Development Status :: 4 - Beta', + #'Development Status :: 5 - Production/Stable', 'Operating System :: OS Independent', @@ -50,20 +50,20 @@ 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - # 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', ], platforms=['any'], keywords='pandas, yahoo finance, pandas datareader', packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']), - install_requires=['pandas>=0.24.0', 'numpy>=1.15', + install_requires=['pandas>=1.1.0', 'numpy>=1.15', 'requests>=2.26', 'multitasking>=0.0.7', - 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5'], + 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', + 'frozendict>=2.3.4', + 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], entry_points={ 'console_scripts': [ 'sample=sample:main', diff --git a/tests/prices.py b/tests/prices.py index 052739eb4..df167243b 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -6,19 +6,19 @@ import pytz as _tz import numpy as _np import pandas as _pd +import os # Create temp session import requests_cache, tempfile td = tempfile.TemporaryDirectory() -cache_fp = td.name+'/'+"yfinance.cache" class TestPriceHistory(unittest.TestCase): def setUp(self): global td self.td = td - self.session = requests_cache.CachedSession(self.td.name + '/' + "yfinance.cache") + self.session = requests_cache.CachedSession(os.path.join(self.td.name, "yfinance.cache")) def tearDown(self): self.session.close() @@ -116,8 +116,8 @@ def test_dailyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -132,7 +132,7 @@ def test_dailyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -150,8 +150,8 @@ def test_weeklyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -166,7 +166,7 @@ def test_weeklyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -183,8 +183,8 @@ def test_monthlyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -199,7 +199,7 @@ def test_monthlyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -232,25 +232,24 @@ def test_dst_fix(self): interval = "1d" df = dat.history(start=start, end=end, interval=interval) - self.assertTrue(((df.index.weekday>=0) & (df.index.weekday<=4)).all()) + self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all()) interval = "1wk" df = dat.history(start=start, end=end, interval=interval) try: - self.assertTrue((df.index.weekday==0).all()) + self.assertTrue((df.index.weekday == 0).all()) except: print("Weekly data not aligned to Monday") raise def test_weekly_2rows_fix(self): tkr = "AMZN" - start = _dt.date.today()-_dt.timedelta(days=14) + start = _dt.date.today() - _dt.timedelta(days=14) start -= _dt.timedelta(days=start.weekday()) dat = yf.Ticker(tkr) df = dat.history(start=start, interval="1wk") - self.assertTrue((df.index.weekday==0).all()) - + self.assertTrue((df.index.weekday == 0).all()) def test_repair_weekly(self): # Sometimes, Yahoo returns prices 100x the correct value. @@ -501,9 +500,11 @@ def test_repair_daily(self): f_1 = ratio == 1 self.assertTrue((f_100 | f_1).all()) - -if __name__ == '__main__': - unittest.main() +try: + if __name__ == '__main__': + unittest.main() +finally: + td.cleanup() # # Run tests sequentially: # import inspect @@ -513,4 +514,3 @@ def test_repair_daily(self): # ) # unittest.main(verbosity=2) -td.cleanup() diff --git a/tests/ticker.py b/tests/ticker.py index e64920917..1d3f528ab 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -1,7 +1,16 @@ +import pandas as pd + from .context import yfinance as yf import unittest +log_requests = False + +if log_requests: + import logging + + logging.basicConfig(level=logging.DEBUG) + # Create temp session import requests_cache, tempfile td = tempfile.TemporaryDirectory() @@ -65,5 +74,207 @@ def test_badTicker(self): dat.earnings_dates dat.earnings_forecasts +class TestTickerEarnings(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_earnings_history(self): + data = self.ticker.earnings_history + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_history + self.assertIs(data, data_cached, "data not cached") + + def test_earnings(self): + data = self.ticker.earnings + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings + self.assertIs(data, data_cached, "data not cached") + + def test_quarterly_earnings(self): + data = self.ticker.quarterly_earnings + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_earnings + self.assertIs(data, data_cached, "data not cached") + + def test_earnings_forecasts(self): + data = self.ticker.earnings_forecasts + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_forecasts + self.assertIs(data, data_cached, "data not cached") + + def test_earnings_dates(self): + data = self.ticker.earnings_dates + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_dates + self.assertIs(data, data_cached, "data not cached") + + def test_earnings_trend(self): + data = self.ticker.earnings_trend + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_trend + self.assertIs(data, data_cached, "data not cached") + + +class TestTickerHolders(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_major_holders(self): + data = self.ticker.major_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.major_holders + self.assertIs(data, data_cached, "data not cached") + + def test_institutional_holders(self): + data = self.ticker.institutional_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.institutional_holders + self.assertIs(data, data_cached, "data not cached") + + def test_mutualfund_holders(self): + data = self.ticker.mutualfund_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.mutualfund_holders + self.assertIs(data, data_cached, "data not cached") + + +class TestTickerMiscFinancials(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_balance_sheet(self): + data = self.ticker.balance_sheet + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.balance_sheet + self.assertIs(data, data_cached, "data not cached") + + def test_quarterly_balance_sheet(self): + data = self.ticker.quarterly_balance_sheet + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_balance_sheet + self.assertIs(data, data_cached, "data not cached") + + def test_cashflow(self): + data = self.ticker.cashflow + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.cashflow + self.assertIs(data, data_cached, "data not cached") + + def test_quarterly_cashflow(self): + data = self.ticker.quarterly_cashflow + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_cashflow + self.assertIs(data, data_cached, "data not cached") + + def test_sustainability(self): + data = self.ticker.sustainability + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.sustainability + self.assertIs(data, data_cached, "data not cached") + + def test_recommendations(self): + data = self.ticker.recommendations + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.recommendations + self.assertIs(data, data_cached, "data not cached") + + def test_recommendations_summary(self): + data = self.ticker.recommendations_summary + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.recommendations_summary + self.assertIs(data, data_cached, "data not cached") + + def test_analyst_price_target(self): + data = self.ticker.analyst_price_target + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.analyst_price_target + self.assertIs(data, data_cached, "data not cached") + + def test_revenue_forecasts(self): + data = self.ticker.revenue_forecasts + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.revenue_forecasts + self.assertIs(data, data_cached, "data not cached") + + def test_calendar(self): + data = self.ticker.calendar + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.calendar + self.assertIs(data, data_cached, "data not cached") + + def test_isin(self): + data = self.ticker.isin + self.assertIsInstance(data, str, "data has wrong type") + self.assertEqual("ARDEUT116159", data, "data is empty") + + data_cached = self.ticker.isin + self.assertIs(data, data_cached, "data not cached") + + def test_options(self): + data = self.ticker.options + self.assertIsInstance(data, tuple, "data has wrong type") + self.assertTrue(len(data) > 1, "data is empty") + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(TestTicker('Test ticker')) + suite.addTest(TestTickerEarnings('Test earnings')) + suite.addTest(TestTickerHolders('Test holders')) + suite.addTest(TestTickerMiscFinancials('Test balance sheet')) + return suite + + if __name__ == '__main__': - unittest.main() + runner = unittest.TextTestRunner() + runner.run(suite()) diff --git a/yfinance/base.py b/yfinance/base.py index c38d057f2..731703375 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -23,21 +23,16 @@ import time as _time import datetime as _datetime -import requests as _requests import pandas as _pd import numpy as _np -import re as _re -try: - from urllib.parse import quote as urlencode -except ImportError: - from urllib import quote as urlencode +from .data import TickerData + +from urllib.parse import quote as urlencode from . import utils import json as _json -# import re as _re -# import sys as _sys from . import shared @@ -84,12 +79,9 @@ def __init__(self, ticker, session=None): if utils.is_isin(self.ticker): self.ticker = utils.get_ticker_by_isin(self.ticker, None, session) + self._data = TickerData(self.ticker, session=session) + def stats(self, proxy=None): - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} if self._fundamentals: return @@ -97,7 +89,7 @@ def stats(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - data = utils.get_json_data_stores(ticker_url, proxy, self.session)["QuoteSummaryStore"] + data = self._data.get_json_data_stores(ticker_url, proxy)["QuoteSummaryStore"] return data def history(self, period="1mo", interval="1d", @@ -197,15 +189,12 @@ def history(self, period="1mo", interval="1d", # Getting data from json url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) - session = self.session or _requests data = None try: - data = session.get( + data = self._data.get( url=url, params=params, - proxies=proxy, - headers=utils.user_agent_headers, timeout=timeout ) if "Will be right back" in data.text or data is None: @@ -355,7 +344,7 @@ def history(self, period="1mo", interval="1d", df.index.name = "Datetime" else: df.index.name = "Date" - # If localizing a midnight during DST transition hour when clocks roll back, + # If localizing a midnight during DST transition hour when clocks roll back, # meaning clock hits midnight twice, then use the 2nd (ambiguous=True) df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True) @@ -560,18 +549,11 @@ def _fetch_ticker_tz(self, debug_mode, proxy, timeout): params = {"range": "1d", "interval": "1d"} - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - # Getting data from json url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) - session = self.session or _requests try: - data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout) + data = self._data.get(url=url, params=params, proxy=proxy, timeout=timeout) data = data.json() except Exception as e: if debug_mode: @@ -596,12 +578,6 @@ def _fetch_ticker_tz(self, debug_mode, proxy, timeout): return None def _get_info(self, proxy=None): - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - if (self._info is not None) or (self._sustainability is not None) or self._recommendations: # No need to fetch return @@ -609,7 +585,7 @@ def _get_info(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - json_data = utils.get_json_data_stores(ticker_url, proxy, self.session) + json_data = self._data.get_json_data_stores(ticker_url, proxy) if 'QuoteSummaryStore' not in json_data: err_msg = "No summary info found, symbol may be delisted" print('- %s: %s' % (self.ticker, err_msg)) @@ -677,7 +653,8 @@ def _get_info(self, proxy=None): self._info['logo_url'] = "" try: if not 'website' in self._info: - self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % self._info['shortName'].split(' ')[0].split(',')[0] + self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \ + self._info['shortName'].split(' ')[0].split(',')[0] else: domain = self._info['website'].split( '://')[1].split('/')[0].replace('www.', '') @@ -712,9 +689,8 @@ def _get_info(self, proxy=None): pass # Complementary key-statistics. For now just want 'trailing PEG ratio' - session = self.session or _requests keys = {"trailingPegRatio"} - if len(keys)>0: + if len(keys) > 0: # Simplified the original scrape code for key-statistics. Very expensive for fetching # just one value, best if scraping most/all: # @@ -737,13 +713,16 @@ def _get_info(self, proxy=None): # pass # # For just one/few variable is faster to query directly: - url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(self.ticker, self.ticker) + url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format( + self.ticker, self.ticker) for k in keys: - url += "&type="+k + url += "&type=" + k # Request 6 months of data - url += "&period1={}".format(int((_datetime.datetime.now()-_datetime.timedelta(days=365//2)).timestamp())) - url += "&period2={}".format(int((_datetime.datetime.now()+_datetime.timedelta(days=1)).timestamp())) - json_str = session.get(url=url, proxies=proxy, headers=utils.user_agent_headers).text + url += "&period1={}".format( + int((_datetime.datetime.now() - _datetime.timedelta(days=365 // 2)).timestamp())) + url += "&period2={}".format(int((_datetime.datetime.now() + _datetime.timedelta(days=1)).timestamp())) + + json_str = self._data.get(url=url, proxy=proxy).text json_data = _json.loads(json_str) key_stats = json_data["timeseries"]["result"][0] if k not in key_stats: @@ -754,7 +733,6 @@ def _get_info(self, proxy=None): v = key_stats[k][-1]["reportedValue"]["raw"] self._info[k] = v - def _get_fundamentals(self, proxy=None): def cleanup(data): ''' @@ -780,12 +758,6 @@ def cleanup(data): df.index = utils.camel2title(df.index) return df - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - if self._fundamentals: return @@ -793,8 +765,8 @@ def cleanup(data): # holders try: - resp = utils.get_html(ticker_url + '/holders', proxy, self.session) - holders = _pd.read_html(resp) + resp = self._data.get(ticker_url + '/holders', proxy) + holders = _pd.read_html(resp.text) except Exception: holders = [] @@ -830,18 +802,18 @@ def cleanup(data): self._earnings = {"yearly": utils._pd.DataFrame(), "quarterly": utils._pd.DataFrame()} self._financials = {} for name in ["income", "balance-sheet", "cash-flow"]: - self._financials[name] = {"yearly":utils._pd.DataFrame(), "quarterly":utils._pd.DataFrame()} + self._financials[name] = {"yearly": utils._pd.DataFrame(), "quarterly": utils._pd.DataFrame()} - fin_data = utils.get_json_data_stores(ticker_url + '/financials', proxy, self.session) - if not "QuoteSummaryStore" in fin_data: + financials_data = self._data.get_json_data_stores(ticker_url + '/financials', proxy) + if not "QuoteSummaryStore" in financials_data: err_msg = "No financials data found, symbol may be delisted" print('- %s: %s' % (self.ticker, err_msg)) return None - fin_data_quote = fin_data['QuoteSummaryStore'] + fin_data_quote = financials_data['QuoteSummaryStore'] # generic patterns for name in ["income", "balance-sheet", "cash-flow"]: - annual, qtr = self._create_financials_table(name, proxy) + annual, qtr = self._create_financials_table(name, financials_data, proxy) if annual is not None: self._financials[name]["yearly"] = annual if qtr is not None: @@ -868,7 +840,9 @@ def cleanup(data): # shares outstanding try: # keep only years with non None data - available_shares = [shares_data for shares_data in fin_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] if shares_data] + available_shares = [shares_data for shares_data in + financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] if + shares_data] shares = _pd.DataFrame(available_shares) shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4])) shares.set_index('Year', inplace=True) @@ -881,7 +855,7 @@ def cleanup(data): pass # Analysis - data = utils.get_json_data_stores(ticker_url + '/analysis', proxy, self.session)["QuoteSummaryStore"] + data = self._data.get_json_data_stores(ticker_url + '/analysis', proxy)["QuoteSummaryStore"] if isinstance(data.get('earningsTrend'), dict): try: @@ -910,7 +884,7 @@ def cleanup(data): # Analysis Data/Analyst Forecasts try: - analysis_data = utils.get_json_data_stores(ticker_url+'/analysis',proxy,self.session) + analysis_data = self._data.get_json_data_stores(ticker_url + '/analysis', proxy) analysis_data = analysis_data['QuoteSummaryStore'] except Exception as e: analysis_data = {} @@ -919,7 +893,8 @@ def cleanup(data): except Exception as e: self._analyst_trend_details = None try: - self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T + self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[ + ['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T except Exception as e: self._analyst_price_target = None earnings_estimate = [] @@ -946,7 +921,7 @@ def cleanup(data): self._fundamentals = True - def _create_financials_table(self, name, proxy): + def _create_financials_table(self, name, financials_data, proxy): acceptable_names = ["income", "balance-sheet", "cash-flow"] if not name in acceptable_names: raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) @@ -955,9 +930,6 @@ def _create_financials_table(self, name, proxy): # Yahoo stores the 'income' table internally under 'financials' key name = "financials" - ticker_url = "{}/{}".format(self._scrape_url, self.ticker) - data_store = utils.get_json_data_stores(ticker_url+'/'+name, proxy, self.session) - _stmt_annual = None _stmt_qtr = None try: @@ -965,8 +937,8 @@ def _create_financials_table(self, name, proxy): # visible on Yahoo website. But more work needed to make it user-friendly! Ideally # return a tree data structure instead of Pandas MultiIndex # So until this is implemented, just return simple tables - _stmt_annual = utils.get_financials_time_series(self.ticker, name, "annual", ticker_url, proxy, self.session) - _stmt_qtr = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) + _stmt_annual = self._data.get_financials_time_series(name, "annual", financials_data, proxy) + _stmt_qtr = self._data.get_financials_time_series(name, "quarterly", financials_data, proxy) # template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) @@ -980,12 +952,11 @@ def _create_financials_table(self, name, proxy): # _qtr_data = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) # _stmt_qtr = utils.format_quarterly_financial_statement(_qtr_data, level_detail, template_order) - except: + except Exception as e: pass return _stmt_annual, _stmt_qtr - def get_recommendations(self, proxy=None, as_dict=False): self._get_info(proxy) data = self._recommendations @@ -1077,7 +1048,8 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): data = self._earnings[freq] if as_dict: dict_data = data.to_dict() - dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings['financialCurrency'] + dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings[ + 'financialCurrency'] return dict_data return data @@ -1144,12 +1116,6 @@ def get_isin(self, proxy=None): self._isin = '-' return self._isin - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - q = ticker self.get_info(proxy=proxy) if self._info is None: @@ -1161,12 +1127,7 @@ def get_isin(self, proxy=None): url = 'https://markets.businessinsider.com/ajax/' \ 'SearchController_Suggest?max_results=25&query=%s' \ % urlencode(q) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).text + data = self._data.get(url=url, proxy=proxy).text search_str = '"{}|'.format(ticker) if search_str not in data: @@ -1186,20 +1147,9 @@ def get_news(self, proxy=None): if self._news: return self._news - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - # Getting data from json url = "{}/v1/finance/search?q={}".format(self._base_url, self.ticker) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ) + data = self._data.get(url=url, proxy=proxy) if "Will be right back" in data.text: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" "Our engineers are working quickly to resolve " @@ -1214,12 +1164,6 @@ def get_earnings_dates(self, proxy=None): if self._earnings_dates is not None: return self._earnings_dates - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - page_size = 100 # YF caps at 100, don't go higher page_offset = 0 dates = None @@ -1227,12 +1171,7 @@ def get_earnings_dates(self, proxy=None): url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format( _ROOT_URL_, self.ticker, page_offset, page_size) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).text + data = self._data.get(url=url, proxy=proxy).text if "Will be right back" in data: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" @@ -1292,3 +1231,29 @@ def get_earnings_dates(self, proxy=None): self._earnings_dates = dates return dates + + def get_earnings_history(self, proxy=None): + if self._earnings_history is not None: + return self._earnings_history + + url = "{}/calendar/earnings?symbol={}".format(_ROOT_URL_, self.ticker) + data = self._data.get(url=url, proxy=proxy).text + + if "Will be right back" in data: + raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" + "Our engineers are working quickly to resolve " + "the issue. Thank you for your patience.") + + try: + # read_html returns a list of pandas Dataframes of all the tables in `data` + data = _pd.read_html(data)[0] + data.replace("-", _np.nan, inplace=True) + + data['EPS Estimate'] = _pd.to_numeric(data['EPS Estimate']) + data['Reported EPS'] = _pd.to_numeric(data['Reported EPS']) + self._earnings_history = data + # if no tables are found a ValueError is thrown + except ValueError: + print("Could not find earnings history data for {}.".format(self.ticker)) + return + return data diff --git a/yfinance/data.py b/yfinance/data.py new file mode 100644 index 000000000..6ce5a68fe --- /dev/null +++ b/yfinance/data.py @@ -0,0 +1,156 @@ +import datetime +import functools +from functools import lru_cache + +import pandas as pd +import requests as requests +import re + +from frozendict import frozendict + +try: + import ujson as json +except ImportError: + import json as json + +cache_maxsize = 64 + + +def lru_cache_freezeargs(func): + """ + Decorator transforms mutable dictionary arguments into immutable + Needed so lru_cache can cache method calls what has dict arguments. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args]) + kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} + return func(*args, **kwargs) + + # copy over the lru_cache extra methods to this wrapper to be able to access them + # after this decorator has been applied + wrapped.cache_info = func.cache_info + wrapped.cache_clear = func.cache_clear + return wrapped + + +class TickerData: + """ + Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations + """ + user_agent_headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + + def __init__(self, ticker: str, session=None): + self._ticker = ticker + self._session = session or requests + + @lru_cache_freezeargs + @lru_cache(maxsize=cache_maxsize) + def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30): + proxy = self._get_proxy(proxy) + response = self._session.get( + url=url, + params=params, + proxies=proxy, + timeout=timeout, + headers=user_agent_headers or self.user_agent_headers) + return response + + def _get_proxy(self, proxy): + # setup proxy in requests format + if proxy is not None: + if isinstance(proxy, dict) and "https" in proxy: + proxy = proxy["https"] + proxy = {"https": proxy} + return proxy + + @lru_cache_freezeargs + @lru_cache(maxsize=cache_maxsize) + def get_json_data_stores(self, url, proxy=None): + ''' + get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page. + ''' + html = self.get(url=url, proxy=proxy).text + + json_str = html.split('root.App.main =')[1].split( + '(this)')[0].split(';\n}')[0].strip() + data = json.loads(json_str)['context']['dispatcher']['stores'] + + # return data + new_data = json.dumps(data).replace('{}', 'null') + new_data = re.sub( + r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data) + + return json.loads(new_data) + + # Note cant use lru_cache as financials_data is a nested dict (freezeargs only handle flat dicts) + def get_financials_time_series(self, name, timescale, financials_data, proxy=None): + + acceptable_names = ["financials", "balance-sheet", "cash-flow"] + if name not in acceptable_names: + raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) + acceptable_timestamps = ["annual", "quarterly"] + if timescale not in acceptable_timestamps: + raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) + + # Step 1: get the keys: + def _finditem1(key, obj): + values = [] + if isinstance(obj, dict): + if key in obj.keys(): + values.append(obj[key]) + for k, v in obj.items(): + values += _finditem1(key, v) + elif isinstance(obj, list): + for v in obj: + values += _finditem1(key, v) + return values + + keys = _finditem1("key", financials_data['FinancialTemplateStore']) + + # Step 2: construct url: + ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format( + self._ticker) + if len(keys) == 0: + raise Exception("Fetching keys failed") + url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys]) + # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: + start_dt = datetime.datetime(2016, 12, 31) + end = (datetime.datetime.now() + datetime.timedelta(days=366)) + url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp())) + + # Step 3: fetch and reshape data + json_str = self.get(url=url, proxy=proxy).text + json_data = json.loads(json_str) + data_raw = json_data["timeseries"]["result"] + # data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data + for d in data_raw: + del d["meta"] + + # Now reshape data into a table: + # Step 1: get columns and index: + timestamps = set() + data_unpacked = {} + for x in data_raw: + for k in x.keys(): + if k == "timestamp": + timestamps.update(x[k]) + else: + data_unpacked[k] = x[k] + timestamps = sorted(list(timestamps)) + dates = pd.to_datetime(timestamps, unit="s") + df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys())) + for k, v in data_unpacked.items(): + if df is None: + df = pd.DataFrame(columns=dates, index=[k]) + df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v} + + df.index = df.index.str.replace("^" + timescale, "", regex=True) + + # Reorder table to match order on Yahoo website + df = df.reindex([k for k in keys if k in df.index]) + df = df[sorted(df.columns, reverse=True)] + + return df diff --git a/yfinance/ticker.py b/yfinance/ticker.py index d14af9599..0c821ebf8 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -21,17 +21,11 @@ from __future__ import print_function -# import time as _time import datetime as _datetime -import requests as _requests import pandas as _pd -# import numpy as _np -# import json as _json -# import re as _re from collections import namedtuple as _namedtuple -from . import utils from .base import TickerBase @@ -48,17 +42,7 @@ def _download_options(self, date=None, proxy=None): url = "{}/v7/finance/options/{}?date={}".format( self._base_url, self.ticker, date) - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - - r = _requests.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).json() + r = self._data.get(url=url, proxy=proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: self._expirations[_datetime.datetime.utcfromtimestamp( diff --git a/yfinance/utils.py b/yfinance/utils.py index fe53f8317..689238f6b 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -109,31 +109,6 @@ def empty_earnings_dates_df(): return empty -def get_html(url, proxy=None, session=None): - session = session or _requests - html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - return html - - -def get_json_data_stores(url, proxy=None, session=None): - ''' - get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page. - ''' - session = session or _requests - html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - - json_str = html.split('root.App.main =')[1].split( - '(this)')[0].split(';\n}')[0].strip() - data = _json.loads(json_str)['context']['dispatcher']['stores'] - - # return data - new_data = _json.dumps(data).replace('{}', 'null') - new_data = _re.sub( - r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data) - - return _json.loads(new_data) - - def build_template(data): ''' build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". @@ -266,79 +241,6 @@ def format_quarterly_financial_statement(_statement, level_detail, order): return _statement -def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session=None): - acceptable_names = ["financials", "balance-sheet", "cash-flow"] - if not name in acceptable_names: - raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) - acceptable_timestamps = ["annual", "quarterly"] - if not timescale in acceptable_timestamps: - raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) - - session = session or _requests - - financials_data = get_json_data_stores(ticker_url + '/' + name, proxy, session) - - # Step 1: get the keys: - def _finditem1(key, obj): - values = [] - if isinstance(obj, dict): - if key in obj.keys(): - values.append(obj[key]) - for k, v in obj.items(): - values += _finditem1(key, v) - elif isinstance(obj, list): - for v in obj: - values += _finditem1(key, v) - return values - - keys = _finditem1("key", financials_data['FinancialTemplateStore']) - - # Step 2: construct url: - ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format( - ticker) - if len(keys) == 0: - raise Exception("Fetching keys failed") - url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys]) - # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: - start_dt = _datetime.datetime(2016, 12, 31) - end = (_datetime.datetime.now() + _datetime.timedelta(days=366)) - url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp())) - - # Step 3: fetch and reshape data - json_str = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - json_data = _json.loads(json_str) - data_raw = json_data["timeseries"]["result"] - # data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data - for d in data_raw: - del d["meta"] - - # Now reshape data into a table: - # Step 1: get columns and index: - timestamps = set() - data_unpacked = {} - for x in data_raw: - for k in x.keys(): - if k == "timestamp": - timestamps.update(x[k]) - else: - data_unpacked[k] = x[k] - timestamps = sorted(list(timestamps)) - dates = _pd.to_datetime(timestamps, unit="s") - df = _pd.DataFrame(columns=dates, index=data_unpacked.keys()) - for k, v in data_unpacked.items(): - if df is None: - df = _pd.DataFrame(columns=dates, index=[k]) - df.loc[k] = {_pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v} - - df.index = df.index.str.replace("^" + timescale, "", regex=True) - - # Reorder table to match order on Yahoo website - df = df.reindex([k for k in keys if k in df.index]) - df = df[sorted(df.columns, reverse=True)] - - return df - - def camel2title(o): return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o] diff --git a/yfinance/version.py b/yfinance/version.py index 4c60c717a..eae51afe9 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.1.81" +version = "0.2.0rc1"