From fb5c67b3bddb103f4b53f2e2274355a3c166b3d7 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Wed, 26 Oct 2022 22:39:41 +0100 Subject: [PATCH 01/14] Bump version to 0.2.0rc1 - big update --- CHANGELOG.rst | 10 ++++++++++ yfinance/version.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 245505dfa..ce989f113 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,16 @@ Change Log =========== +0.2.0rc1 +------ +Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes +- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099 +- price repair feature #1110 +- fix merging of dividends/splits with prices #1069 #1086 #1102 +- fix Yahoo returning latest price interval across 2 rows #1070 +- optional: raise errors as exceptions: raise_errors=True #1104 +- add proper unit tests #1069 + 0.1.81 ------ - Fix unhandled tz-cache exception #1107 diff --git a/yfinance/version.py b/yfinance/version.py index 4c60c717a..eae51afe9 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.1.81" +version = "0.2.0rc1" From 05520ee10817328afcf60c5721a49dd84fc05016 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 13:10:06 +0100 Subject: [PATCH 02/14] Have one place to retrieve data in order to ease caching and speed up operations and reduce code duplication. Needs Python 3.6 --- .gitignore | 9 +++ requirements.txt | 3 +- setup.py | 2 +- tests/ticker.py | 82 ++++++++++++++++++++++- yfinance/base.py | 157 +++++++++++++------------------------------- yfinance/data.py | 158 +++++++++++++++++++++++++++++++++++++++++++++ yfinance/ticker.py | 18 +----- yfinance/utils.py | 100 +--------------------------- 8 files changed, 297 insertions(+), 232 deletions(-) create mode 100644 yfinance/data.py diff --git a/.gitignore b/.gitignore index 2dac9c903..c997cc7fe 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,12 @@ build/ *.html *.css *.png + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2e1bae2dc..184c2cef7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ requests>=2.26 multitasking>=0.0.7 lxml>=4.5.1 appdirs>=1.4.4 -pytz>=2022.5 \ No newline at end of file +pytz>=2022.5 +frozendict>=2.3.4 \ No newline at end of file diff --git a/setup.py b/setup.py index 442a48c50..855edf285 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']), install_requires=['pandas>=0.24.0', 'numpy>=1.15', 'requests>=2.26', 'multitasking>=0.0.7', - 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5'], + 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', 'frozendict>=2.3.4'], entry_points={ 'console_scripts': [ 'sample=sample:main', diff --git a/tests/ticker.py b/tests/ticker.py index e3de47619..3a059e9b5 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -1,6 +1,11 @@ +import pandas as pd + from .context import yfinance as yf import unittest +import logging + +logging.basicConfig(level=logging.DEBUG) class TestTicker(unittest.TestCase): @@ -23,5 +28,80 @@ def test_getTz(self): self.assertIsNotNone(tz) +class TestTickerEarnings(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_earnings_history(self): + data = self.ticker.earnings_history + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_history + self.assertIs(data, data_cached, "data not cached") + + def test_earnings(self): + data = self.ticker.earnings + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings + self.assertIs(data, data_cached, "data not cached") + + def test_quarterly_earnings(self): + data = self.ticker.quarterly_earnings + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_earnings + self.assertIs(data, data_cached, "data not cached") + + def test_earnings_forecasts(self): + data = self.ticker.earnings_forecasts + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_forecasts + self.assertIs(data, data_cached, "data not cached") + + +class TestTickerHolders(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_major_holders(self): + data = self.ticker.major_holders + self.assertIsInstance(data, pd.DataFrame, "major_holders has wrong type") + self.assertFalse(data.empty, "major_holders is empty") + + data_cached = self.ticker.major_holders + assert data is data_cached, "not cached" + + def test_institutional_holders(self): + data = self.ticker.institutional_holders + self.assertIsInstance(data, pd.DataFrame, "major_holders has wrong type") + self.assertFalse(data.empty, "major_holders is empty") + + data_cached = self.ticker.institutional_holders + assert data is data_cached, "not cached" + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(TestTicker('Test ticker')) + suite.addTest(TestTickerEarnings('Test Earnings')) + suite.addTest(TestTickerHolders('Test holders')) + return suite + + if __name__ == '__main__': - unittest.main() + runner = unittest.TextTestRunner() + runner.run(suite()) diff --git a/yfinance/base.py b/yfinance/base.py index eb1242ac3..b3d09122e 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -23,21 +23,16 @@ import time as _time import datetime as _datetime -import requests as _requests import pandas as _pd import numpy as _np -import re as _re -try: - from urllib.parse import quote as urlencode -except ImportError: - from urllib import quote as urlencode +from .data import TickerData + +from urllib.parse import quote as urlencode from . import utils import json as _json -# import re as _re -# import sys as _sys from . import shared @@ -84,12 +79,9 @@ def __init__(self, ticker, session=None): if utils.is_isin(self.ticker): self.ticker = utils.get_ticker_by_isin(self.ticker, None, session) + self._data = TickerData(self.ticker, session=session) + def stats(self, proxy=None): - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} if self._fundamentals: return @@ -97,7 +89,7 @@ def stats(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - data = utils.get_json_data_stores(ticker_url, proxy, self.session)["QuoteSummaryStore"] + data = self._data.get_json_data_stores(ticker_url, proxy)["QuoteSummaryStore"] return data def history(self, period="1mo", interval="1d", @@ -197,15 +189,12 @@ def history(self, period="1mo", interval="1d", # Getting data from json url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) - session = self.session or _requests data = None try: - data = session.get( + data = self._data.get( url=url, params=params, - proxies=proxy, - headers=utils.user_agent_headers, timeout=timeout ) if "Will be right back" in data.text or data is None: @@ -557,18 +546,11 @@ def _fetch_ticker_tz(self, debug_mode, proxy, timeout): params = {"range": "1d", "interval": "1d"} - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - # Getting data from json url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker) - session = self.session or _requests try: - data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout) + data = self._data.get(url=url, params=params, proxy=proxy, timeout=timeout) data = data.json() except Exception as e: if debug_mode: @@ -593,12 +575,6 @@ def _fetch_ticker_tz(self, debug_mode, proxy, timeout): return None def _get_info(self, proxy=None): - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - if (self._info is not None) or (self._sustainability is not None) or self._recommendations: # No need to fetch return @@ -606,7 +582,7 @@ def _get_info(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - data = utils.get_json_data_stores(ticker_url, proxy, self.session)['QuoteSummaryStore'] + data = self._data.get_json_data_stores(ticker_url, proxy)['QuoteSummaryStore'] # sustainability d = {} @@ -669,7 +645,8 @@ def _get_info(self, proxy=None): self._info['logo_url'] = "" try: if not 'website' in self._info: - self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % self._info['shortName'].split(' ')[0].split(',')[0] + self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \ + self._info['shortName'].split(' ')[0].split(',')[0] else: domain = self._info['website'].split( '://')[1].split('/')[0].replace('www.', '') @@ -704,9 +681,8 @@ def _get_info(self, proxy=None): pass # Complementary key-statistics. For now just want 'trailing PEG ratio' - session = self.session or _requests keys = {"trailingPegRatio"} - if len(keys)>0: + if len(keys) > 0: # Simplified the original scrape code for key-statistics. Very expensive for fetching # just one value, best if scraping most/all: # @@ -729,13 +705,16 @@ def _get_info(self, proxy=None): # pass # # For just one/few variable is faster to query directly: - url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(self.ticker, self.ticker) + url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format( + self.ticker, self.ticker) for k in keys: - url += "&type="+k + url += "&type=" + k # Request 6 months of data - url += "&period1={}".format(int((_datetime.datetime.now()-_datetime.timedelta(days=365//2)).timestamp())) - url += "&period2={}".format(int((_datetime.datetime.now()+_datetime.timedelta(days=1)).timestamp())) - json_str = session.get(url=url, proxies=proxy, headers=utils.user_agent_headers).text + url += "&period1={}".format( + int((_datetime.datetime.now() - _datetime.timedelta(days=365 // 2)).timestamp())) + url += "&period2={}".format(int((_datetime.datetime.now() + _datetime.timedelta(days=1)).timestamp())) + + json_str = self._data.get(url=url, proxy=proxy).text json_data = _json.loads(json_str) key_stats = json_data["timeseries"]["result"][0] if k not in key_stats: @@ -746,7 +725,6 @@ def _get_info(self, proxy=None): v = key_stats[k][-1]["reportedValue"]["raw"] self._info[k] = v - def _get_fundamentals(self, proxy=None): def cleanup(data): ''' @@ -772,12 +750,6 @@ def cleanup(data): df.index = utils.camel2title(df.index) return df - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - if self._fundamentals: return @@ -785,8 +757,8 @@ def cleanup(data): # holders try: - resp = utils.get_html(ticker_url + '/holders', proxy, self.session) - holders = _pd.read_html(resp) + resp = self._data.get(ticker_url + '/holders', proxy) + holders = _pd.read_html(resp.text) except Exception: holders = [] @@ -819,16 +791,16 @@ def cleanup(data): self._get_info(proxy) # get fundamentals - fin_data = utils.get_json_data_stores(ticker_url + '/financials', proxy, self.session) - fin_data_quote = fin_data['QuoteSummaryStore'] + financials_data = self._data.get_json_data_stores(ticker_url + '/financials', proxy) + fin_data_quote = financials_data['QuoteSummaryStore'] # generic patterns self._earnings = {"yearly": utils._pd.DataFrame(), "quarterly": utils._pd.DataFrame()} self._financials = {} for name in ["income", "balance-sheet", "cash-flow"]: - self._financials[name] = {"yearly":utils._pd.DataFrame(), "quarterly":utils._pd.DataFrame()} + self._financials[name] = {"yearly": utils._pd.DataFrame(), "quarterly": utils._pd.DataFrame()} for name in ["income", "balance-sheet", "cash-flow"]: - annual, qtr = self._create_financials_table(name, proxy) + annual, qtr = self._create_financials_table(name, financials_data, proxy) if annual is not None: self._financials[name]["yearly"] = annual if qtr is not None: @@ -855,7 +827,9 @@ def cleanup(data): # shares outstanding try: # keep only years with non None data - available_shares = [shares_data for shares_data in fin_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] if shares_data] + available_shares = [shares_data for shares_data in + financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] if + shares_data] shares = _pd.DataFrame(available_shares) shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4])) shares.set_index('Year', inplace=True) @@ -868,7 +842,7 @@ def cleanup(data): pass # Analysis - data = utils.get_json_data_stores(ticker_url + '/analysis', proxy, self.session)["QuoteSummaryStore"] + data = self._data.get_json_data_stores(ticker_url + '/analysis', proxy)["QuoteSummaryStore"] if isinstance(data.get('earningsTrend'), dict): try: @@ -897,7 +871,7 @@ def cleanup(data): # Analysis Data/Analyst Forecasts try: - analysis_data = utils.get_json_data_stores(ticker_url+'/analysis',proxy,self.session) + analysis_data = self._data.get_json_data_stores(ticker_url + '/analysis', proxy) analysis_data = analysis_data['QuoteSummaryStore'] except Exception as e: analysis_data = {} @@ -906,7 +880,8 @@ def cleanup(data): except Exception as e: self._analyst_trend_details = None try: - self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T + self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[ + ['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T except Exception as e: self._analyst_price_target = None earnings_estimate = [] @@ -933,7 +908,7 @@ def cleanup(data): self._fundamentals = True - def _create_financials_table(self, name, proxy): + def _create_financials_table(self, name, financials_data, proxy): acceptable_names = ["income", "balance-sheet", "cash-flow"] if not name in acceptable_names: raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) @@ -943,7 +918,7 @@ def _create_financials_table(self, name, proxy): name = "financials" ticker_url = "{}/{}".format(self._scrape_url, self.ticker) - data_store = utils.get_json_data_stores(ticker_url+'/'+name, proxy, self.session) + # data_store = utils.get_json_data_stores(ticker_url+'/'+name, proxy, self.session) _stmt_annual = None _stmt_qtr = None @@ -952,8 +927,8 @@ def _create_financials_table(self, name, proxy): # visible on Yahoo website. But more work needed to make it user-friendly! Ideally # return a tree data structure instead of Pandas MultiIndex # So until this is implemented, just return simple tables - _stmt_annual = utils.get_financials_time_series(self.ticker, name, "annual", ticker_url, proxy, self.session) - _stmt_qtr = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) + _stmt_annual = self._data.get_financials_time_series(self.ticker, name, "annual", financials_data, proxy) + _stmt_qtr = self._data.get_financials_time_series(self.ticker, name, "quarterly", financials_data, proxy) # template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) @@ -972,7 +947,6 @@ def _create_financials_table(self, name, proxy): return _stmt_annual, _stmt_qtr - def get_recommendations(self, proxy=None, as_dict=False): self._get_info(proxy) data = self._recommendations @@ -1064,7 +1038,8 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): data = self._earnings[freq] if as_dict: dict_data = data.to_dict() - dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings['financialCurrency'] + dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings[ + 'financialCurrency'] return dict_data return data @@ -1131,12 +1106,6 @@ def get_isin(self, proxy=None): self._isin = '-' return self._isin - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - q = ticker self.get_info(proxy=proxy) if "shortName" in self._info: @@ -1145,12 +1114,7 @@ def get_isin(self, proxy=None): url = 'https://markets.businessinsider.com/ajax/' \ 'SearchController_Suggest?max_results=25&query=%s' \ % urlencode(q) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).text + data = self._data.get(url=url, proxy=proxy).text search_str = '"{}|'.format(ticker) if search_str not in data: @@ -1170,20 +1134,9 @@ def get_news(self, proxy=None): if self._news: return self._news - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - # Getting data from json url = "{}/v1/finance/search?q={}".format(self._base_url, self.ticker) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ) + data = self._data.get(url=url, proxy=proxy) if "Will be right back" in data.text: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" "Our engineers are working quickly to resolve " @@ -1198,12 +1151,6 @@ def get_earnings_dates(self, proxy=None): if self._earnings_dates is not None: return self._earnings_dates - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - page_size = 100 # YF caps at 100, don't go higher page_offset = 0 dates = None @@ -1211,12 +1158,7 @@ def get_earnings_dates(self, proxy=None): url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format( _ROOT_URL_, self.ticker, page_offset, page_size) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).text + data = self._data.get(url=url, proxy=proxy).text if "Will be right back" in data: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" @@ -1276,22 +1218,11 @@ def get_earnings_dates(self, proxy=None): return dates def get_earnings_history(self, proxy=None): - if self._earnings_history: + if self._earnings_history is not None: return self._earnings_history - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - url = "{}/calendar/earnings?symbol={}".format(_ROOT_URL_, self.ticker) - session = self.session or _requests - data = session.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).text + data = self._data.get(url=url, proxy=proxy).text if "Will be right back" in data: raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" diff --git a/yfinance/data.py b/yfinance/data.py new file mode 100644 index 000000000..31abcc43d --- /dev/null +++ b/yfinance/data.py @@ -0,0 +1,158 @@ +import datetime +import functools +from functools import lru_cache + +import pandas as pd +import requests as requests +import re + +from frozendict import frozendict + +try: + import ujson as json +except ImportError: + import json as json + +cache_maxsize = 1000 + + +def freezeargs(func): + """ + Decorator transforms mutable dictionary arguments into immutable + Needed so lru_cache can cache method calls what has dict arguments. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args]) + kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} + return func(*args, **kwargs) + + return wrapped + + +class TickerData: + """ + Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations + """ + user_agent_headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + + def __init__(self, ticker: str, session=None): + self._ticker = ticker + self._session = session or requests + + @freezeargs + @lru_cache(maxsize=cache_maxsize) + def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30): + proxy = self._get_proxy(proxy) + response = self._session.get( + url=url, + params=params, + proxies=proxy, + timeout=timeout, + headers=user_agent_headers or self.user_agent_headers) + return response + + def _get_proxy(self, proxy): + # setup proxy in requests format + if proxy is not None: + if isinstance(proxy, dict) and "https" in proxy: + proxy = proxy["https"] + proxy = {"https": proxy} + return proxy + + @freezeargs + @lru_cache(maxsize=cache_maxsize) + def get_html(self, url, proxy=None): + html = self._session.get(url=url, proxies=proxy, headers=self.user_agent_headers).text + return html + + @freezeargs + @lru_cache(maxsize=cache_maxsize) + def get_json_data_stores(self, url, proxy=None): + ''' + get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page. + ''' + html = self.get(url=url, proxy=proxy).text + + json_str = html.split('root.App.main =')[1].split( + '(this)')[0].split(';\n}')[0].strip() + data = json.loads(json_str)['context']['dispatcher']['stores'] + + # return data + new_data = json.dumps(data).replace('{}', 'null') + new_data = re.sub( + r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data) + + return json.loads(new_data) + + @freezeargs + @lru_cache(maxsize=cache_maxsize) + def get_financials_time_series(self, name, timescale, financials_data, proxy=None): + acceptable_names = ["financials", "balance-sheet", "cash-flow"] + if not name in acceptable_names: + raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) + acceptable_timestamps = ["annual", "quarterly"] + if not timescale in acceptable_timestamps: + raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) + + # Step 1: get the keys: + def _finditem1(key, obj): + values = [] + if isinstance(obj, dict): + if key in obj.keys(): + values.append(obj[key]) + for k, v in obj.items(): + values += _finditem1(key, v) + elif isinstance(obj, list): + for v in obj: + values += _finditem1(key, v) + return values + + keys = _finditem1("key", financials_data['FinancialTemplateStore']) + + # Step 2: construct url: + ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format( + self._ticker) + if len(keys) == 0: + raise Exception("Fetching keys failed") + url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys]) + # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: + start_dt = datetime.datetime(2016, 12, 31) + end = (datetime.datetime.now() + datetime.timedelta(days=366)) + url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp())) + + # Step 3: fetch and reshape data + json_str = self.get(url=url, proxy=proxy).text + json_data = json.loads(json_str) + data_raw = json_data["timeseries"]["result"] + # data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data + for d in data_raw: + del d["meta"] + + # Now reshape data into a table: + # Step 1: get columns and index: + timestamps = set() + data_unpacked = {} + for x in data_raw: + for k in x.keys(): + if k == "timestamp": + timestamps.update(x[k]) + else: + data_unpacked[k] = x[k] + timestamps = sorted(list(timestamps)) + dates = pd.to_datetime(timestamps, unit="s") + df = pd.DataFrame(columns=dates, index=data_unpacked.keys()) + for k, v in data_unpacked.items(): + if df is None: + df = pd.DataFrame(columns=dates, index=[k]) + df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v} + + df.index = df.index.str.replace("^" + timescale, "", regex=True) + + # Reorder table to match order on Yahoo website + df = df.reindex([k for k in keys if k in df.index]) + df = df[sorted(df.columns, reverse=True)] + + return df diff --git a/yfinance/ticker.py b/yfinance/ticker.py index d14af9599..0c821ebf8 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -21,17 +21,11 @@ from __future__ import print_function -# import time as _time import datetime as _datetime -import requests as _requests import pandas as _pd -# import numpy as _np -# import json as _json -# import re as _re from collections import namedtuple as _namedtuple -from . import utils from .base import TickerBase @@ -48,17 +42,7 @@ def _download_options(self, date=None, proxy=None): url = "{}/v7/finance/options/{}?date={}".format( self._base_url, self.ticker, date) - # setup proxy in requests format - if proxy is not None: - if isinstance(proxy, dict) and "https" in proxy: - proxy = proxy["https"] - proxy = {"https": proxy} - - r = _requests.get( - url=url, - proxies=proxy, - headers=utils.user_agent_headers - ).json() + r = self._data.get(url=url, proxy=proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: self._expirations[_datetime.datetime.utcfromtimestamp( diff --git a/yfinance/utils.py b/yfinance/utils.py index b25bae882..f5c898fc2 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -59,7 +59,7 @@ def get_all_by_isin(isin, proxy=None, session=None): from .base import _BASE_URL_ session = session or _requests url = "{}/v1/finance/search?q={}".format(_BASE_URL_, isin) - data = session.get(url=url, proxies=proxy, headers=user_agent_headers) + data = session.get(url=url, proxy=proxy, headers=user_agent_headers) try: data = data.json() ticker = data.get('quotes', [{}])[0] @@ -109,31 +109,6 @@ def empty_earnings_dates_df(): return empty -def get_html(url, proxy=None, session=None): - session = session or _requests - html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - return html - - -def get_json_data_stores(url, proxy=None, session=None): - ''' - get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page. - ''' - session = session or _requests - html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - - json_str = html.split('root.App.main =')[1].split( - '(this)')[0].split(';\n}')[0].strip() - data = _json.loads(json_str)['context']['dispatcher']['stores'] - - # return data - new_data = _json.dumps(data).replace('{}', 'null') - new_data = _re.sub( - r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data) - - return _json.loads(new_data) - - def build_template(data): ''' build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". @@ -266,79 +241,6 @@ def format_quarterly_financial_statement(_statement, level_detail, order): return _statement -def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session=None): - acceptable_names = ["financials", "balance-sheet", "cash-flow"] - if not name in acceptable_names: - raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) - acceptable_timestamps = ["annual", "quarterly"] - if not timescale in acceptable_timestamps: - raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) - - session = session or _requests - - financials_data = get_json_data_stores(ticker_url + '/' + name, proxy, session) - - # Step 1: get the keys: - def _finditem1(key, obj): - values = [] - if isinstance(obj, dict): - if key in obj.keys(): - values.append(obj[key]) - for k, v in obj.items(): - values += _finditem1(key, v) - elif isinstance(obj, list): - for v in obj: - values += _finditem1(key, v) - return values - - keys = _finditem1("key", financials_data['FinancialTemplateStore']) - - # Step 2: construct url: - ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format( - ticker) - if len(keys) == 0: - raise Exception("Fetching keys failed") - url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys]) - # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: - start_dt = _datetime.datetime(2016, 12, 31) - end = (_datetime.datetime.now() + _datetime.timedelta(days=366)) - url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp())) - - # Step 3: fetch and reshape data - json_str = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - json_data = _json.loads(json_str) - data_raw = json_data["timeseries"]["result"] - # data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data - for d in data_raw: - del d["meta"] - - # Now reshape data into a table: - # Step 1: get columns and index: - timestamps = set() - data_unpacked = {} - for x in data_raw: - for k in x.keys(): - if k == "timestamp": - timestamps.update(x[k]) - else: - data_unpacked[k] = x[k] - timestamps = sorted(list(timestamps)) - dates = _pd.to_datetime(timestamps, unit="s") - df = _pd.DataFrame(columns=dates, index=data_unpacked.keys()) - for k, v in data_unpacked.items(): - if df is None: - df = _pd.DataFrame(columns=dates, index=[k]) - df.loc[k] = {_pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v} - - df.index = df.index.str.replace("^" + timescale, "", regex=True) - - # Reorder table to match order on Yahoo website - df = df.reindex([k for k in keys if k in df.index]) - df = df[sorted(df.columns, reverse=True)] - - return df - - def camel2title(o): return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o] From 07e19f187a7b57947e05131c0105d86d4afa7b09 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 13:34:58 +0100 Subject: [PATCH 03/14] Dropped Python versions before 3.6 from package support metadata --- setup.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index 855edf285..8f34fe1cc 100644 --- a/setup.py +++ b/setup.py @@ -38,8 +38,8 @@ classifiers=[ 'License :: OSI Approved :: Apache Software License', # 'Development Status :: 3 - Alpha', - # 'Development Status :: 4 - Beta', - 'Development Status :: 5 - Production/Stable', + 'Development Status :: 4 - Beta', + #'Development Status :: 5 - Production/Stable', 'Operating System :: OS Independent', @@ -50,20 +50,19 @@ 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - # 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', ], platforms=['any'], keywords='pandas, yahoo finance, pandas datareader', packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']), - install_requires=['pandas>=0.24.0', 'numpy>=1.15', + install_requires=['pandas>=1.5.0', 'numpy>=1.15', 'requests>=2.26', 'multitasking>=0.0.7', - 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', 'frozendict>=2.3.4'], + 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', + 'frozendict>=2.3.4'], entry_points={ 'console_scripts': [ 'sample=sample:main', From 9f9f7b00d864b985d9a53e524bec383ccd2d8d80 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 13:50:21 +0100 Subject: [PATCH 04/14] Revert to require a pandas version that supports 3.6.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8f34fe1cc..c73f214d5 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ platforms=['any'], keywords='pandas, yahoo finance, pandas datareader', packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']), - install_requires=['pandas>=1.5.0', 'numpy>=1.15', + install_requires=['pandas>=1.1.0', 'numpy>=1.15', 'requests>=2.26', 'multitasking>=0.0.7', 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', 'frozendict>=2.3.4'], From 743f3acb87b4bee8a8a2577d8a6f402a144cb26f Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 13:55:44 +0100 Subject: [PATCH 05/14] Removed unused get_html method. --- yfinance/data.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index 31abcc43d..432c7553b 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -62,12 +62,6 @@ def _get_proxy(self, proxy): proxy = {"https": proxy} return proxy - @freezeargs - @lru_cache(maxsize=cache_maxsize) - def get_html(self, url, proxy=None): - html = self._session.get(url=url, proxies=proxy, headers=self.user_agent_headers).text - return html - @freezeargs @lru_cache(maxsize=cache_maxsize) def get_json_data_stores(self, url, proxy=None): From 2cc3cbb1e8058929f673e28575182e316c959983 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 14:06:39 +0100 Subject: [PATCH 06/14] Removed extra requests logging used for debugging. --- tests/ticker.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 3a059e9b5..b24de5624 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -3,9 +3,6 @@ from .context import yfinance as yf import unittest -import logging - -logging.basicConfig(level=logging.DEBUG) class TestTicker(unittest.TestCase): From 157b45269d7a8549e526912c07a5b10db08e920d Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 16:59:06 +0100 Subject: [PATCH 07/14] Fixed regression in PR and cleaned up .gitignore --- .gitignore | 4 +--- yfinance/utils.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index c997cc7fe..8ff5d6c60 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,4 @@ build/ .venv env/ venv/ -ENV/ -env.bak/ -venv.bak/ \ No newline at end of file +ENV/ \ No newline at end of file diff --git a/yfinance/utils.py b/yfinance/utils.py index f5c898fc2..dc67d5124 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -59,7 +59,7 @@ def get_all_by_isin(isin, proxy=None, session=None): from .base import _BASE_URL_ session = session or _requests url = "{}/v1/finance/search?q={}".format(_BASE_URL_, isin) - data = session.get(url=url, proxy=proxy, headers=user_agent_headers) + data = session.get(url=url, proxies=proxy, headers=user_agent_headers) try: data = data.json() ticker = data.get('quotes', [{}])[0] From 438f512f470d4ae3221aacad66320e39a4acf0bd Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 17:01:09 +0100 Subject: [PATCH 08/14] Cleaned up .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8ff5d6c60..5c73cbf9d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ build/ .venv env/ venv/ -ENV/ \ No newline at end of file +ENV/ From 2fed55a0d151d45987761b7fa3b41f8a642ff465 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 19:14:51 +0100 Subject: [PATCH 09/14] Improved TestTickerHolders test. --- tests/ticker.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index b24de5624..c642bc3bf 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -76,19 +76,27 @@ def tearDown(self): def test_major_holders(self): data = self.ticker.major_holders - self.assertIsInstance(data, pd.DataFrame, "major_holders has wrong type") - self.assertFalse(data.empty, "major_holders is empty") + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") data_cached = self.ticker.major_holders - assert data is data_cached, "not cached" + self.assertIs(data, data_cached, "data not cached") def test_institutional_holders(self): data = self.ticker.institutional_holders - self.assertIsInstance(data, pd.DataFrame, "major_holders has wrong type") - self.assertFalse(data.empty, "major_holders is empty") + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") data_cached = self.ticker.institutional_holders - assert data is data_cached, "not cached" + self.assertIs(data, data_cached, "data not cached") + + def test_mutualfund_holders(self): + data = self.ticker.mutualfund_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.mutualfund_holders + self.assertIs(data, data_cached, "data not cached") def suite(): From 2be718700faefe546e61811cc4c0f7b5b62b58a6 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 20:16:10 +0100 Subject: [PATCH 10/14] Fixed regression bug with balance_sheet and added test for it. --- tests/ticker.py | 26 +++++++++++++++++++++++++- yfinance/base.py | 9 +++------ yfinance/data.py | 8 ++++---- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index c642bc3bf..a7e035764 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -4,6 +4,11 @@ import unittest +log_requests = False + +if log_requests: + import logging + logging.basicConfig(level=logging.DEBUG) class TestTicker(unittest.TestCase): def setUp(self): @@ -99,11 +104,30 @@ def test_mutualfund_holders(self): self.assertIs(data, data_cached, "data not cached") +class TestTickerBalanceSheet(unittest.TestCase): + + def setUp(self): + self.ticker = yf.Ticker("GOOGL") + + def tearDown(self): + self.ticker = None + + def test_balance_sheet(self): + data = self.ticker.balance_sheet + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.balance_sheet + self.assertIs(data, data_cached, "data not cached") + + + def suite(): suite = unittest.TestSuite() suite.addTest(TestTicker('Test ticker')) - suite.addTest(TestTickerEarnings('Test Earnings')) + suite.addTest(TestTickerEarnings('Test earnings')) suite.addTest(TestTickerHolders('Test holders')) + suite.addTest(TestTickerBalanceSheet('Test balance sheet')) return suite diff --git a/yfinance/base.py b/yfinance/base.py index b3d09122e..84a44f0ae 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -917,9 +917,6 @@ def _create_financials_table(self, name, financials_data, proxy): # Yahoo stores the 'income' table internally under 'financials' key name = "financials" - ticker_url = "{}/{}".format(self._scrape_url, self.ticker) - # data_store = utils.get_json_data_stores(ticker_url+'/'+name, proxy, self.session) - _stmt_annual = None _stmt_qtr = None try: @@ -927,8 +924,8 @@ def _create_financials_table(self, name, financials_data, proxy): # visible on Yahoo website. But more work needed to make it user-friendly! Ideally # return a tree data structure instead of Pandas MultiIndex # So until this is implemented, just return simple tables - _stmt_annual = self._data.get_financials_time_series(self.ticker, name, "annual", financials_data, proxy) - _stmt_qtr = self._data.get_financials_time_series(self.ticker, name, "quarterly", financials_data, proxy) + _stmt_annual = self._data.get_financials_time_series(name, "annual", financials_data, proxy) + _stmt_qtr = self._data.get_financials_time_series(name, "quarterly", financials_data, proxy) # template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) @@ -942,7 +939,7 @@ def _create_financials_table(self, name, financials_data, proxy): # _qtr_data = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) # _stmt_qtr = utils.format_quarterly_financial_statement(_qtr_data, level_detail, template_order) - except: + except Exception as e: pass return _stmt_annual, _stmt_qtr diff --git a/yfinance/data.py b/yfinance/data.py index 432c7553b..2f20dcf16 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -81,14 +81,14 @@ def get_json_data_stores(self, url, proxy=None): return json.loads(new_data) - @freezeargs - @lru_cache(maxsize=cache_maxsize) + # Note cant use lru_cache as financials_data is a nested dict (freezeargs only handle flat dicts) def get_financials_time_series(self, name, timescale, financials_data, proxy=None): + acceptable_names = ["financials", "balance-sheet", "cash-flow"] - if not name in acceptable_names: + if name not in acceptable_names: raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) acceptable_timestamps = ["annual", "quarterly"] - if not timescale in acceptable_timestamps: + if timescale not in acceptable_timestamps: raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) # Step 1: get the keys: From 112fd5cf640e3d5f0f0dd89871f8893bafca9b34 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 6 Nov 2022 20:49:36 +0100 Subject: [PATCH 11/14] Added more tests for Ticker and missing dependencies. --- requirements.txt | 4 +- setup.py | 3 +- tests/ticker.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 184c2cef7..28964912b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,6 @@ multitasking>=0.0.7 lxml>=4.5.1 appdirs>=1.4.4 pytz>=2022.5 -frozendict>=2.3.4 \ No newline at end of file +frozendict>=2.3.4 +beautifulsoup4>=4.11.1 +html5lib>=1.1 \ No newline at end of file diff --git a/setup.py b/setup.py index c73f214d5..fac4f1123 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,8 @@ install_requires=['pandas>=1.1.0', 'numpy>=1.15', 'requests>=2.26', 'multitasking>=0.0.7', 'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5', - 'frozendict>=2.3.4'], + 'frozendict>=2.3.4', + 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], entry_points={ 'console_scripts': [ 'sample=sample:main', diff --git a/tests/ticker.py b/tests/ticker.py index a7e035764..3d53dc25e 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -8,8 +8,10 @@ if log_requests: import logging + logging.basicConfig(level=logging.DEBUG) + class TestTicker(unittest.TestCase): def setUp(self): pass @@ -70,6 +72,22 @@ def test_earnings_forecasts(self): data_cached = self.ticker.earnings_forecasts self.assertIs(data, data_cached, "data not cached") + def test_earnings_dates(self): + data = self.ticker.earnings_dates + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_dates + self.assertIs(data, data_cached, "data not cached") + + def test_earnings_trend(self): + data = self.ticker.earnings_trend + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.earnings_trend + self.assertIs(data, data_cached, "data not cached") + class TestTickerHolders(unittest.TestCase): @@ -104,7 +122,7 @@ def test_mutualfund_holders(self): self.assertIs(data, data_cached, "data not cached") -class TestTickerBalanceSheet(unittest.TestCase): +class TestTickerMiscFinancials(unittest.TestCase): def setUp(self): self.ticker = yf.Ticker("GOOGL") @@ -120,6 +138,90 @@ def test_balance_sheet(self): data_cached = self.ticker.balance_sheet self.assertIs(data, data_cached, "data not cached") + def test_quarterly_balance_sheet(self): + data = self.ticker.quarterly_balance_sheet + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_balance_sheet + self.assertIs(data, data_cached, "data not cached") + + def test_cashflow(self): + data = self.ticker.cashflow + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.cashflow + self.assertIs(data, data_cached, "data not cached") + + def test_quarterly_cashflow(self): + data = self.ticker.quarterly_cashflow + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.quarterly_cashflow + self.assertIs(data, data_cached, "data not cached") + + def test_sustainability(self): + data = self.ticker.sustainability + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.sustainability + self.assertIs(data, data_cached, "data not cached") + + def test_recommendations(self): + data = self.ticker.recommendations + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.recommendations + self.assertIs(data, data_cached, "data not cached") + + def test_recommendations_summary(self): + data = self.ticker.recommendations_summary + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.recommendations_summary + self.assertIs(data, data_cached, "data not cached") + + def test_analyst_price_target(self): + data = self.ticker.analyst_price_target + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.analyst_price_target + self.assertIs(data, data_cached, "data not cached") + + def test_revenue_forecasts(self): + data = self.ticker.revenue_forecasts + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.revenue_forecasts + self.assertIs(data, data_cached, "data not cached") + + def test_calendar(self): + data = self.ticker.calendar + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.calendar + self.assertIs(data, data_cached, "data not cached") + + def test_isin(self): + data = self.ticker.isin + self.assertIsInstance(data, str, "data has wrong type") + self.assertEquals("ARDEUT116159", data, "data is empty") + + data_cached = self.ticker.isin + self.assertIs(data, data_cached, "data not cached") + + def test_options(self): + data = self.ticker.options + self.assertIsInstance(data, tuple, "data has wrong type") + self.assertTrue(len(data) > 1, "data is empty") def suite(): @@ -127,7 +229,7 @@ def suite(): suite.addTest(TestTicker('Test ticker')) suite.addTest(TestTickerEarnings('Test earnings')) suite.addTest(TestTickerHolders('Test holders')) - suite.addTest(TestTickerBalanceSheet('Test balance sheet')) + suite.addTest(TestTickerMiscFinancials('Test balance sheet')) return suite From a775669ac56f877bce4afce32581f2f60d969593 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Mon, 7 Nov 2022 00:00:55 +0100 Subject: [PATCH 12/14] Tried to clean up the temp folder creation in test. --- tests/prices.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/prices.py b/tests/prices.py index 052739eb4..df167243b 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -6,19 +6,19 @@ import pytz as _tz import numpy as _np import pandas as _pd +import os # Create temp session import requests_cache, tempfile td = tempfile.TemporaryDirectory() -cache_fp = td.name+'/'+"yfinance.cache" class TestPriceHistory(unittest.TestCase): def setUp(self): global td self.td = td - self.session = requests_cache.CachedSession(self.td.name + '/' + "yfinance.cache") + self.session = requests_cache.CachedSession(os.path.join(self.td.name, "yfinance.cache")) def tearDown(self): self.session.close() @@ -116,8 +116,8 @@ def test_dailyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -132,7 +132,7 @@ def test_dailyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -150,8 +150,8 @@ def test_weeklyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -166,7 +166,7 @@ def test_weeklyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -183,8 +183,8 @@ def test_monthlyWithEvents(self): end_d = "2020-11-29" df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True) df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) - self.assertTrue(((df2["Dividends"]>0)|(df2["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) + self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -199,7 +199,7 @@ def test_monthlyWithEvents(self): for tkr in tkrs: df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True) df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False) - self.assertTrue(((df1["Dividends"]>0)|(df1["Stock Splits"]>0)).any()) + self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any()) try: self.assertTrue(df1.index.equals(df2.index)) except: @@ -232,25 +232,24 @@ def test_dst_fix(self): interval = "1d" df = dat.history(start=start, end=end, interval=interval) - self.assertTrue(((df.index.weekday>=0) & (df.index.weekday<=4)).all()) + self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all()) interval = "1wk" df = dat.history(start=start, end=end, interval=interval) try: - self.assertTrue((df.index.weekday==0).all()) + self.assertTrue((df.index.weekday == 0).all()) except: print("Weekly data not aligned to Monday") raise def test_weekly_2rows_fix(self): tkr = "AMZN" - start = _dt.date.today()-_dt.timedelta(days=14) + start = _dt.date.today() - _dt.timedelta(days=14) start -= _dt.timedelta(days=start.weekday()) dat = yf.Ticker(tkr) df = dat.history(start=start, interval="1wk") - self.assertTrue((df.index.weekday==0).all()) - + self.assertTrue((df.index.weekday == 0).all()) def test_repair_weekly(self): # Sometimes, Yahoo returns prices 100x the correct value. @@ -501,9 +500,11 @@ def test_repair_daily(self): f_1 = ratio == 1 self.assertTrue((f_100 | f_1).all()) - -if __name__ == '__main__': - unittest.main() +try: + if __name__ == '__main__': + unittest.main() +finally: + td.cleanup() # # Run tests sequentially: # import inspect @@ -513,4 +514,3 @@ def test_repair_daily(self): # ) # unittest.main(verbosity=2) -td.cleanup() From 5bfbec5df042642bcf59d7401b62ef420bfeb480 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Mon, 7 Nov 2022 10:29:42 +0100 Subject: [PATCH 13/14] Decreased default cache_maxsize for lru_cache after some investigation of memory usage. Also fixed warning about wrong type used for dataframe index. --- yfinance/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index 2f20dcf16..bddd9151e 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -13,7 +13,7 @@ except ImportError: import json as json -cache_maxsize = 1000 +cache_maxsize = 128 def freezeargs(func): @@ -137,7 +137,7 @@ def _finditem1(key, obj): data_unpacked[k] = x[k] timestamps = sorted(list(timestamps)) dates = pd.to_datetime(timestamps, unit="s") - df = pd.DataFrame(columns=dates, index=data_unpacked.keys()) + df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys())) for k, v in data_unpacked.items(): if df is None: df = pd.DataFrame(columns=dates, index=[k]) From c7cf4378f67e5f76c90cb42c0e13bc32521d01cb Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Tue, 8 Nov 2022 01:36:28 +0100 Subject: [PATCH 14/14] Lowered lru_cache size and made cache_info and cache_clear work on lru_cached methods. --- tests/ticker.py | 2 +- yfinance/data.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index afa976878..1d3f528ab 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -255,7 +255,7 @@ def test_calendar(self): def test_isin(self): data = self.ticker.isin self.assertIsInstance(data, str, "data has wrong type") - self.assertEquals("ARDEUT116159", data, "data is empty") + self.assertEqual("ARDEUT116159", data, "data is empty") data_cached = self.ticker.isin self.assertIs(data, data_cached, "data not cached") diff --git a/yfinance/data.py b/yfinance/data.py index bddd9151e..6ce5a68fe 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -13,10 +13,10 @@ except ImportError: import json as json -cache_maxsize = 128 +cache_maxsize = 64 -def freezeargs(func): +def lru_cache_freezeargs(func): """ Decorator transforms mutable dictionary arguments into immutable Needed so lru_cache can cache method calls what has dict arguments. @@ -28,6 +28,10 @@ def wrapped(*args, **kwargs): kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} return func(*args, **kwargs) + # copy over the lru_cache extra methods to this wrapper to be able to access them + # after this decorator has been applied + wrapped.cache_info = func.cache_info + wrapped.cache_clear = func.cache_clear return wrapped @@ -42,7 +46,7 @@ def __init__(self, ticker: str, session=None): self._ticker = ticker self._session = session or requests - @freezeargs + @lru_cache_freezeargs @lru_cache(maxsize=cache_maxsize) def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30): proxy = self._get_proxy(proxy) @@ -62,7 +66,7 @@ def _get_proxy(self, proxy): proxy = {"https": proxy} return proxy - @freezeargs + @lru_cache_freezeargs @lru_cache(maxsize=cache_maxsize) def get_json_data_stores(self, url, proxy=None): '''