From 883b7f07757d4c1f1da92ff6aa57d54f51f0d270 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Wed, 7 Jul 2021 18:50:10 +1000 Subject: [PATCH 01/20] Updated the utils get_json function to standardize the return. This will enable us to see other store types from the base.py file. --- yfinance/base.py | 4 ++-- yfinance/utils.py | 12 ++++-------- yfinance/version.py | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 6a058ee63..db26e2e37 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -406,8 +406,8 @@ def cleanup(data): pass # get fundamentals - data = utils.get_json(ticker_url+'/financials', proxy, self.session) - + fundamentals_data = utils.get_json(ticker_url+'/financials', proxy, self.session) + data = fundamentals_data['context']['dispatcher']['stores']['QuoteSummaryStore'] # generic patterns for key in ( (self._cashflow, 'cashflowStatement', 'cashflowStatements'), diff --git a/yfinance/utils.py b/yfinance/utils.py index 3ee06f6bd..8b1abeb4b 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -50,20 +50,16 @@ def get_html(url, proxy=None, session=None): def get_json(url, proxy=None, session=None): - + ''' + get_json returns a python dictionary of the store detail for yahoo finance web pages. + ''' session = session or _requests headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} html = session.get(url=url, proxies=proxy, headers=headers).text - if "QuoteSummaryStore" not in html: - html = session.get(url=url, proxies=proxy).text - if "QuoteSummaryStore" not in html: - return {} - json_str = html.split('root.App.main =')[1].split( '(this)')[0].split(';\n}')[0].strip() - data = _json.loads(json_str)[ - 'context']['dispatcher']['stores']['QuoteSummaryStore'] + data = _json.loads(json_str) # return data new_data = _json.dumps(data).replace('{}', 'null') diff --git a/yfinance/version.py b/yfinance/version.py index b3a095c98..de6d389b5 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.1.60" +version = "0.1.61" From 91f489147583ebb0dd31fce163bcd44d0f7b0ce3 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Wed, 7 Jul 2021 20:33:35 +1000 Subject: [PATCH 02/20] Added some functions to enable us to grab key details from the analysis section of yahoo finance: current_recommendations, revenue_forecasts and earnings_forecasts. --- yfinance/base.py | 53 +++++++++++++++++++++++++++++++++++++++++++++- yfinance/ticker.py | 12 +++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/yfinance/base.py b/yfinance/base.py index db26e2e37..bdabc6daa 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -405,7 +405,7 @@ def cleanup(data): except Exception: pass - # get fundamentals + # get fundamental financial data fundamentals_data = utils.get_json(ticker_url+'/financials', proxy, self.session) data = fundamentals_data['context']['dispatcher']['stores']['QuoteSummaryStore'] # generic patterns @@ -445,6 +445,36 @@ def cleanup(data): self._earnings['quarterly'] = df except Exception as e: pass + + # analysis data + analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) + analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + try: + self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) + except: + self._analyst_trend_details = _pd.DataFrame() + earnings_estimate = [] + revenue_estimate = [] + if len(self._analyst_trend_details) != 0: + for key in analysis_data['earningsTrend']['trend']: + try: + earnings_dict = key['earningsEstimate'] + earnings_dict['period'] = key['period'] + earnings_dict['endDate'] = key['endDate'] + earnings_estimate.append(earnings_dict) + + revenue_dict = key['revenueEstimate'] + revenue_dict['period'] = key['period'] + revenue_dict['endDate'] = key['endDate'] + revenue_estimate.append(revenue_dict) + except: + pass + self._rev_est = _pd.DataFrame(revenue_estimate) + self._eps_est = _pd.DataFrame(earnings_estimate) + else: + self._rev_est = _pd.DataFrame() + self._eps_est = _pd.DataFrame() + self._fundamentals = True @@ -498,6 +528,27 @@ def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs): if as_dict: return data.to_dict() return data + + def get_current_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._analyst_trend_details + if as_dict: + return data.to_dict() + return data + + def get_rev_forecast(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._rev_est + if as_dict: + return data.to_dict() + return data + + def get_earnings_forecast(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._eps_est + if as_dict: + return data.to_dict() + return data def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 4000e7bf5..71464d1a9 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -189,6 +189,18 @@ def quarterly_cashflow(self): def sustainability(self): return self.get_sustainability() + @property + def current_recommendations(self): + return self.get_current_recommendations() + + @property + def revenue_forecasts(self): + return self.get_rev_forecast() + + @property + def earnings_forecasts(self): + return self.get_earnings_forecast() + @property def options(self): if not self._expirations: From da2672f338ce218b8c7aa328700c7d1d33db9b45 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Thu, 8 Jul 2021 16:47:47 +1000 Subject: [PATCH 03/20] Updated README to include additional functions. --- README.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.rst b/README.rst index 2c18a6da3..cba309ddc 100644 --- a/README.rst +++ b/README.rst @@ -132,6 +132,15 @@ Note: yahoo finance datetimes are received as UTC. opt = msft.option_chain('YYYY-MM-DD') # data available via: opt.calls, opt.puts + # get analyst recommendation trends + msft.current_recommendations + + # get analyst revenue forecasts + msft.revenue_forecasts + + # get analyst earnings forecasts + msft.earnings_forecasts + If you want to use a proxy server for downloading data, use: .. code:: python From cb3103615355503f0ede6e30b2ac750ef963fb28 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Thu, 8 Jul 2021 17:11:10 +1000 Subject: [PATCH 04/20] Updated to include analyst price target forecasts. --- README.rst | 3 +++ yfinance/base.py | 15 ++++++++++++++- yfinance/ticker.py | 4 ++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index cba309ddc..7562f8812 100644 --- a/README.rst +++ b/README.rst @@ -135,6 +135,9 @@ Note: yahoo finance datetimes are received as UTC. # get analyst recommendation trends msft.current_recommendations + # get analyst price targets (low estimate, current price, mean estimate, high estimate and number of analysts providing estimates) + msft.analyst_price_target + # get analyst revenue forecasts msft.revenue_forecasts diff --git a/yfinance/base.py b/yfinance/base.py index bdabc6daa..b32233b9f 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -448,11 +448,15 @@ def cleanup(data): # analysis data analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) - analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] try: self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) except: self._analyst_trend_details = _pd.DataFrame() + try: + self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T + except: + self._analyst_price_target = _pd.DataFrame() earnings_estimate = [] revenue_estimate = [] if len(self._analyst_trend_details) != 0: @@ -476,6 +480,8 @@ def cleanup(data): self._eps_est = _pd.DataFrame() + + self._fundamentals = True def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): @@ -535,6 +541,13 @@ def get_current_recommendations(self, proxy=None, as_dict=False, *args, **kwargs if as_dict: return data.to_dict() return data + + def get_analyst_price_target(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._analyst_price_target + if as_dict: + return data.to_dict() + return data def get_rev_forecast(self, proxy=None, as_dict=False, *args, **kwargs): self._get_fundamentals(proxy=proxy) diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 71464d1a9..b146adb48 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -193,6 +193,10 @@ def sustainability(self): def current_recommendations(self): return self.get_current_recommendations() + @property + def analyst_price_target(self): + return self.get_analyst_price_target() + @property def revenue_forecasts(self): return self.get_rev_forecast() From a1769e4fe102d23c5359ebc10a7c4a02a2f14c93 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sat, 10 Jul 2021 10:48:15 +1000 Subject: [PATCH 05/20] Added self._income_statement, once happy this new dataframe should replace self._financials. Further work required to understand if there is the opportunity to also do this for balance sheet and cf statement. --- yfinance/base.py | 84 +++++++++++++++++++++++++++++++++++++++++++--- yfinance/ticker.py | 12 +++++++ 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index b32233b9f..76a956553 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -21,8 +21,10 @@ from __future__ import print_function + import time as _time import datetime as _datetime +from numpy.lib.function_base import iterable import requests as _requests import pandas as _pd import numpy as _np @@ -40,6 +42,8 @@ from . import shared +_pd.set_option('display.max_rows', None) +_pd.set_option('display.max_columns', None) class TickerBase(): def __init__(self, ticker, session=None): @@ -446,7 +450,66 @@ def cleanup(data): except Exception as e: pass - # analysis data + # Grab the financial template store. This details the order in which the financials should be presented. + import pdb + pdb.set_trace() + data = fundamentals_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # This provides the layout/correct order of the financial data. + financial_template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object. + financial_template_annual_order = [] # Save the annual ordering to an object. + level_detail = [] #Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this) + for key in data['template']: # Loop through the json to retreive the exact financial order whilst appending to the objects + financial_template_ttm_order.append('trailing{}'.format(key['key'])) + financial_template_annual_order.append('annual{}'.format(key['key'])) + level_detail.append(0) + if 'children' in key: + for child1 in key['children']: # Level 1 + financial_template_ttm_order.append('trailing{}'.format(child1['key'])) + financial_template_annual_order.append('annual{}'.format(child1['key'])) + level_detail.append(1) + if 'children' in child1: + for child2 in child1['children']: # Level 2 + financial_template_ttm_order.append('trailing{}'.format(child2['key'])) + financial_template_annual_order.append('annual{}'.format(child2['key'])) + level_detail.append(2) + if 'children' in child2: + for child3 in child2['children']: # Level 3 + financial_template_ttm_order.append('trailing{}'.format(child3['key'])) + financial_template_annual_order.append('annual{}'.format(child3['key'])) + level_detail.append(3) + + # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts = [] # Save a dictionary object to store the TTM financials. + Annual_dicts = [] # Save a dictionary object to store the Annual financials. + data = fundamentals_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + + for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures. + try: + if len(data['timeSeries'][key]) > 0: + time_series_dict = {} + time_series_dict['index'] = key + for each in data['timeSeries'][key]: # Loop through the years + time_series_dict[each['asOfDate']] = each['reportedValue'] + # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] + if each['periodType'] == 'TTM': + TTM_dicts.append(time_series_dict) + elif each['periodType'] == '12M': + Annual_dicts.append(time_series_dict) + except: + pass + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(financial_template_ttm_order) + Annual = Annual.reindex(financial_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _income_statement = Annual.merge(TTM, left_index=True, right_index=True) + _income_statement['level_detail'] = level_detail + _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) + self._income_statement = _income_statement.dropna(how='all') + + # analysis data/analyst forecasts analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] try: @@ -479,9 +542,6 @@ def cleanup(data): self._rev_est = _pd.DataFrame() self._eps_est = _pd.DataFrame() - - - self._fundamentals = True def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): @@ -572,6 +632,22 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): return dict_data return data + # testing ground start + def get_TTM_detail(self, proxy=None, as_dict=False, freq="yearly"): + self._get_fundamentals(proxy) + data = self._TTM + if as_dict: + return data.to_dict() + return data + + def get_income_statement(self, proxy=None, as_dict=False, freq="yearly"): + self._get_fundamentals(proxy) + data = self._income_statement + if as_dict: + return data.to_dict() + return data + # testing ground end + def get_financials(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) data = self._financials[freq] diff --git a/yfinance/ticker.py b/yfinance/ticker.py index b146adb48..ba21eed9f 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -205,6 +205,18 @@ def revenue_forecasts(self): def earnings_forecasts(self): return self.get_earnings_forecast() + # testing ground start + + @property + def extra_ttm(self): + return self.get_TTM_detail() + + @property + def income_statement(self): + return self.get_income_statement() + + # testing ground end + @property def options(self): if not self._expirations: From c3da55f2e4fa25a706188abfe5a15a852cb4a5ce Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Mon, 12 Jul 2021 19:34:18 +1000 Subject: [PATCH 06/20] Finished updating all of the required updates. --- CHANGELOG.rst | 4 + README.rst | 18 ++-- test_yfinance.py | 12 ++- yfinance/base.py | 204 +++++++++++++++++++++++---------------------- yfinance/ticker.py | 40 +++------ yfinance/utils.py | 60 +++++++++++++ 6 files changed, 197 insertions(+), 141 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9b29f4ee7..507e5bd07 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,5 +1,9 @@ Change Log =========== +0.1.61 +------ +- Updated the way in which the annual financial data is pieced together (income statement, balance sheet and cash flow statement) this now more accurately reflects what is presented in Yahoo Finance. +- Added functionality to enable users to pull in detail from the Yahoo Finance Analysis page. 0.1.60 ------ diff --git a/README.rst b/README.rst index 7562f8812..77eb999e1 100644 --- a/README.rst +++ b/README.rst @@ -90,9 +90,9 @@ Note: yahoo finance datetimes are received as UTC. # show splits msft.splits - # show financials - msft.financials - msft.quarterly_financials + # show income statement + msft.income_statement + msft.quarterly_income_statement # show major holders msft.major_holders @@ -104,9 +104,9 @@ Note: yahoo finance datetimes are received as UTC. msft.balance_sheet msft.quarterly_balance_sheet - # show cashflow - msft.cashflow - msft.quarterly_cashflow + # show cash flow statement + msft.cash_flow_statement + msft.quarterly_cash_flow_statement # show earnings msft.earnings @@ -132,10 +132,10 @@ Note: yahoo finance datetimes are received as UTC. opt = msft.option_chain('YYYY-MM-DD') # data available via: opt.calls, opt.puts - # get analyst recommendation trends + # get analyst recommendation trends (count of analysts recommending strong buy, buy, hold, sell, strong sell over the last few months) msft.current_recommendations - # get analyst price targets (low estimate, current price, mean estimate, high estimate and number of analysts providing estimates) + # get analyst price targets (share price low estimate, current price, mean estimate, high estimate and number of analysts providing estimates) msft.analyst_price_target # get analyst revenue forecasts @@ -157,7 +157,7 @@ If you want to use a proxy server for downloading data, use: msft.get_dividends(proxy="PROXY_SERVER") msft.get_splits(proxy="PROXY_SERVER") msft.get_balance_sheet(proxy="PROXY_SERVER") - msft.get_cashflow(proxy="PROXY_SERVER") + msft.get_cash_flow_statement(proxy="PROXY_SERVER") msft.option_chain(..., proxy="PROXY_SERVER") ... diff --git a/test_yfinance.py b/test_yfinance.py index 500ea572a..f887c8b57 100644 --- a/test_yfinance.py +++ b/test_yfinance.py @@ -41,13 +41,17 @@ def test_attributes(self): ticker.recommendations ticker.earnings ticker.quarterly_earnings - ticker.financials - ticker.quarterly_financials + ticker.income_statement + ticker.quarterly_income_statement ticker.balance_sheet ticker.quarterly_balance_sheet - ticker.cashflow - ticker.quarterly_cashflow + ticker.cash_flow_statement + ticker.quarterly_cash_flow_statement ticker.sustainability + ticker.current_recommendations + ticker.analyst_price_target + ticker.revenue_forecasts + ticker.earnings_forecasts ticker.options def test_holders(self): diff --git a/yfinance/base.py b/yfinance/base.py index 76a956553..f466acb9c 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -57,6 +57,11 @@ def __init__(self, ticker, session=None): self._info = None self._sustainability = None self._recommendations = None + self._analyst_trend_details = None + self._analyst_price_target = None + self._rev_est = None + self._eps_est = None + self._major_holders = None self._institutional_holders = None self._mutualfund_holders = None @@ -65,17 +70,20 @@ def __init__(self, ticker, session=None): self._calendar = None self._expirations = {} + self._income_statement = None + self._balance_sheet = None + self._cash_flow_statement = None self._earnings = { "yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._financials = { - "yearly": utils.empty_df(), + self._quarterly_income_statement = { + # "yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._balancesheet = { - "yearly": utils.empty_df(), + self._quarterly_balance_sheet = { + # "yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._cashflow = { - "yearly": utils.empty_df(), + self._quarterly_cash_flow = { + # "yearly": utils.empty_df(), "quarterly": utils.empty_df()} def history(self, period="1mo", interval="1d", @@ -260,13 +268,13 @@ def history(self, period="1mo", interval="1d", if not actions: df.drop(columns=["Dividends", "Stock Splits"], inplace=True) - return df - # ------------------------ - def _get_fundamentals(self, kind=None, proxy=None): def cleanup(data): + ''' + The cleanup function is used for parsing yahoo finance json financial statement data into a pandas dataframe format. + ''' df = _pd.DataFrame(data).drop(columns=['maxAge']) for col in df.columns: df[col] = _np.where( @@ -284,7 +292,7 @@ def cleanup(data): df.index = utils.camel2title(df.index) return df - # setup proxy in requests format + #------------------ Setup Proxy in Requests Format ------------------ if proxy is not None: if isinstance(proxy, dict) and "https" in proxy: proxy = proxy["https"] @@ -295,10 +303,7 @@ def cleanup(data): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) - # get info and sustainability - data = utils.get_json(ticker_url, proxy, self.session) - - # holders + #------------------ Holders ------------------ try: resp = utils.get_html(ticker_url + '/holders', proxy, self.session) holders = _pd.read_html(resp) @@ -334,7 +339,8 @@ def cleanup(data): self._mutualfund_holders['% Out'] = self._mutualfund_holders[ '% Out'].str.replace('%', '').astype(float)/100 - # sustainability + #------------------ Sustainability ------------------ + data = utils.get_json(ticker_url, proxy, self.session) d = {} try: if isinstance(data.get('esgScores'), dict): @@ -353,7 +359,7 @@ def cleanup(data): except Exception: pass - # info (be nice to python 2) + #------------------ Info (be nice to python 2) ------------------ self._info = {} try: items = ['summaryProfile', 'financialData', 'quoteType', @@ -383,7 +389,7 @@ def cleanup(data): except Exception: pass - # events + #------------------ Events ------------------ try: cal = _pd.DataFrame( data['calendarEvents']['earnings']) @@ -395,7 +401,7 @@ def cleanup(data): except Exception: pass - # analyst recommendations + #------------------ Long Term Analyst Recommendations ------------------ try: rec = _pd.DataFrame( data['upgradeDowngradeHistory']['history']) @@ -408,22 +414,21 @@ def cleanup(data): 'Firm', 'To Grade', 'From Grade', 'Action']].sort_index() except Exception: pass - - # get fundamental financial data - fundamentals_data = utils.get_json(ticker_url+'/financials', proxy, self.session) - data = fundamentals_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + #------------------ Quarterly Income Statement, Balance Sheet and Cash Flow ------------------ + financials_data = utils.get_json(ticker_url+'/financials', proxy, self.session) + data = financials_data['context']['dispatcher']['stores']['QuoteSummaryStore'] # generic patterns for key in ( - (self._cashflow, 'cashflowStatement', 'cashflowStatements'), - (self._balancesheet, 'balanceSheet', 'balanceSheetStatements'), - (self._financials, 'incomeStatement', 'incomeStatementHistory') + (self._quarterly_cash_flow, 'cashflowStatement', 'cashflowStatements'), + (self._quarterly_balance_sheet, 'balanceSheet', 'balanceSheetStatements'), + (self._quarterly_income_statement, 'incomeStatement', 'incomeStatementHistory') ): - item = key[1] + 'History' - if isinstance(data.get(item), dict): - try: - key[0]['yearly'] = cleanup(data[item][key[2]]) - except Exception as e: - pass + # item = key[1] + 'History' + # if isinstance(data.get(item), dict): + # try: + # key[0]['yearly'] = cleanup(data[item][key[2]]) + # except Exception as e: + # pass item = key[1]+'HistoryQuarterly' if isinstance(data.get(item), dict): @@ -432,7 +437,7 @@ def cleanup(data): except Exception as e: pass - # earnings + #------------------ Earnings ------------------ if isinstance(data.get('earnings'), dict): try: earnings = data['earnings']['financialsChart'] @@ -449,67 +454,68 @@ def cleanup(data): self._earnings['quarterly'] = df except Exception as e: pass + + #------------------ Income Statement ------------------ + data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) - # Grab the financial template store. This details the order in which the financials should be presented. - import pdb - pdb.set_trace() - data = fundamentals_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # This provides the layout/correct order of the financial data. - financial_template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object. - financial_template_annual_order = [] # Save the annual ordering to an object. - level_detail = [] #Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this) - for key in data['template']: # Loop through the json to retreive the exact financial order whilst appending to the objects - financial_template_ttm_order.append('trailing{}'.format(key['key'])) - financial_template_annual_order.append('annual{}'.format(key['key'])) - level_detail.append(0) - if 'children' in key: - for child1 in key['children']: # Level 1 - financial_template_ttm_order.append('trailing{}'.format(child1['key'])) - financial_template_annual_order.append('annual{}'.format(child1['key'])) - level_detail.append(1) - if 'children' in child1: - for child2 in child1['children']: # Level 2 - financial_template_ttm_order.append('trailing{}'.format(child2['key'])) - financial_template_annual_order.append('annual{}'.format(child2['key'])) - level_detail.append(2) - if 'children' in child2: - for child3 in child2['children']: # Level 3 - financial_template_ttm_order.append('trailing{}'.format(child3['key'])) - financial_template_annual_order.append('annual{}'.format(child3['key'])) - level_detail.append(3) - - # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - TTM_dicts = [] # Save a dictionary object to store the TTM financials. - Annual_dicts = [] # Save a dictionary object to store the Annual financials. - data = fundamentals_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - - for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures. - try: - if len(data['timeSeries'][key]) > 0: - time_series_dict = {} - time_series_dict['index'] = key - for each in data['timeSeries'][key]: # Loop through the years - time_series_dict[each['asOfDate']] = each['reportedValue'] - # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] - if each['periodType'] == 'TTM': - TTM_dicts.append(time_series_dict) - elif each['periodType'] == '12M': - Annual_dicts.append(time_series_dict) - except: - pass + data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") # Combine the raw financial details and the template - TTM = TTM.reindex(financial_template_ttm_order) - Annual = Annual.reindex(financial_template_annual_order) + TTM = TTM.reindex(financials_template_ttm_order) + Annual = Annual.reindex(financials_template_annual_order) TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). TTM.index = TTM.index.str.replace(r'trailing', '') Annual.index = Annual.index.str.replace(r'annual','') _income_statement = Annual.merge(TTM, left_index=True, right_index=True) - _income_statement['level_detail'] = level_detail + _income_statement.index = utils.camel2title(_income_statement.T) + _income_statement['level_detail'] = financials_level_detail _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) self._income_statement = _income_statement.dropna(how='all') - # analysis data/analyst forecasts + #------------------ Balance Sheet ------------------ + balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) + data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) + + data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + Annual = Annual.reindex(balance_sheet_template_annual_order) + Annual.index = Annual.index.str.replace(r'annual','') + Annual.index = utils.camel2title(Annual.T) + _balance_sheet = Annual + _balance_sheet['level_detail'] = balance_sheet_level_detail + _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) + self._balance_sheet = _balance_sheet.dropna(how='all') + + #------------------ Cash Flow Statement ------------------ + cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) + data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) + + data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(cash_flow_template_ttm_order) + Annual = Annual.reindex(cash_flow_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _cash_flow_statement = Annual.merge(TTM, left_index=True, right_index=True) + _cash_flow_statement.index = utils.camel2title(_cash_flow_statement.T) + _cash_flow_statement['level_detail'] = cash_flow_level_detail + _cash_flow_statement = _cash_flow_statement.set_index([_cash_flow_statement.index,'level_detail']) + self._cash_flow_statement = _cash_flow_statement.dropna(how='all') + + #------------------ Analysis Data/Analyst Forecasts ------------------ analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] try: @@ -632,42 +638,44 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): return dict_data return data - # testing ground start - def get_TTM_detail(self, proxy=None, as_dict=False, freq="yearly"): + def get_income_statement(self, proxy=None, as_dict=False): self._get_fundamentals(proxy) - data = self._TTM + data = self._income_statement + if as_dict: + return data.to_dict() + return data + + def get_quarterly_income_statement(self, proxy=None, as_dict=False): # Could still be used for quarterly + self._get_fundamentals(proxy=proxy) + data = self._quarterly_income_statement["quarterly"] if as_dict: return data.to_dict() return data - def get_income_statement(self, proxy=None, as_dict=False, freq="yearly"): + def get_balance_sheet(self, proxy=None, as_dict=False): self._get_fundamentals(proxy) - data = self._income_statement + data = self._balance_sheet if as_dict: return data.to_dict() return data - # testing ground end - def get_financials(self, proxy=None, as_dict=False, freq="yearly"): + def get_quarterly_balance_sheet(self, proxy=None, as_dict=False): # Could still be used for quarterly self._get_fundamentals(proxy=proxy) - data = self._financials[freq] + data = self._quarterly_balance_sheet["quarterly"] if as_dict: return data.to_dict() return data - - def get_balancesheet(self, proxy=None, as_dict=False, freq="yearly"): + + def get_cash_flow_statement(self, proxy=None, as_dict=False): self._get_fundamentals(proxy=proxy) - data = self._balancesheet[freq] + data = self._cash_flow_statement if as_dict: return data.to_dict() return data - def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"): - return self.get_balancesheet(proxy, as_dict, freq) - - def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"): + def get_quarterly_cash_flow_statement(self, proxy=None, as_dict=False): # Could still be used for quarterly self._get_fundamentals(proxy=proxy) - data = self._cashflow[freq] + data = self._quarterly_cash_flow["quarterly"] if as_dict: return data.to_dict() return data diff --git a/yfinance/ticker.py b/yfinance/ticker.py index ba21eed9f..6302ad6f2 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -154,36 +154,28 @@ def quarterly_earnings(self): return self.get_earnings(freq='quarterly') @property - def financials(self): - return self.get_financials() + def income_statement(self): + return self.get_income_statement() @property - def quarterly_financials(self): - return self.get_financials(freq='quarterly') + def quarterly_income_statement(self): + return self.get_quarterly_income_statement() @property def balance_sheet(self): - return self.get_balancesheet() + return self.get_balance_sheet() @property def quarterly_balance_sheet(self): - return self.get_balancesheet(freq='quarterly') - - @property - def balancesheet(self): - return self.get_balancesheet() + return self.get_quarterly_balance_sheet() @property - def quarterly_balancesheet(self): - return self.get_balancesheet(freq='quarterly') + def cash_flow_statement(self): + return self.get_cash_flow_statement() @property - def cashflow(self): - return self.get_cashflow() - - @property - def quarterly_cashflow(self): - return self.get_cashflow(freq='quarterly') + def quarterly_cash_flow_statement(self): + return self.get_quarterly_cash_flow_statement() @property def sustainability(self): @@ -205,18 +197,6 @@ def revenue_forecasts(self): def earnings_forecasts(self): return self.get_earnings_forecast() - # testing ground start - - @property - def extra_ttm(self): - return self.get_TTM_detail() - - @property - def income_statement(self): - return self.get_income_statement() - - # testing ground end - @property def options(self): if not self._expirations: diff --git a/yfinance/utils.py b/yfinance/utils.py index 8b1abeb4b..989dcf628 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -68,6 +68,66 @@ def get_json(url, proxy=None, session=None): return _json.loads(new_data) +def build_template(data): + ''' + build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". + + Returns: + - template_annual_order: The order that annual figures should be listed in. + - template_ttm_order: The order that TTM (Trailing Twelve Month) figures should be listed in. + - level_detail: The level of each individual line item. E.g. for the "/financials" webpage, "Total Revenue" is a level 0 item and is the summation of "Operating Revenue" and "Excise Taxes" which are level 1 items. + + ''' + template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object. + template_annual_order = [] # Save the annual ordering to an object. + level_detail = [] #Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this) + for key in data['template']: # Loop through the json to retreive the exact financial order whilst appending to the objects + template_ttm_order.append('trailing{}'.format(key['key'])) + template_annual_order.append('annual{}'.format(key['key'])) + level_detail.append(0) + if 'children' in key: + for child1 in key['children']: # Level 1 + template_ttm_order.append('trailing{}'.format(child1['key'])) + template_annual_order.append('annual{}'.format(child1['key'])) + level_detail.append(1) + if 'children' in child1: + for child2 in child1['children']: # Level 2 + template_ttm_order.append('trailing{}'.format(child2['key'])) + template_annual_order.append('annual{}'.format(child2['key'])) + level_detail.append(2) + if 'children' in child2: + for child3 in child2['children']: # Level 3 + template_ttm_order.append('trailing{}'.format(child3['key'])) + template_annual_order.append('annual{}'.format(child3['key'])) + level_detail.append(3) + return template_ttm_order, template_annual_order, level_detail + +def retreive_financial_details(data): + ''' + retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". + + Returns: + - TTM_dicts: A dictionary full of all of the available Trailing Twelve Month figures, this can easily be converted to a pandas dataframe. + - Annual_dicts: A dictionary full of all of the available Annual figures, this can easily be converted to a pandas dataframe. + ''' + TTM_dicts = [] # Save a dictionary object to store the TTM financials. + Annual_dicts = [] # Save a dictionary object to store the Annual financials. + for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures. + try: + if len(data['timeSeries'][key]) > 0: + time_series_dict = {} + time_series_dict['index'] = key + for each in data['timeSeries'][key]: # Loop through the years + time_series_dict[each['asOfDate']] = each['reportedValue'] + # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] + if each['periodType'] == 'TTM': + TTM_dicts.append(time_series_dict) + elif each['periodType'] == '12M': + Annual_dicts.append(time_series_dict) + except: + pass + return TTM_dicts, Annual_dicts + def camel2title(o): return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o] From 81a0a4e6654ddf7a26dcd455af008d1bb6900bd7 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sun, 18 Jul 2021 11:02:41 +1000 Subject: [PATCH 07/20] Updated to align with the yfinance upstream main. --- CHANGELOG.rst | 2 +- yfinance/version.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index befb1dd44..847df3be1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ Change Log =========== -0.1.61 +0.1.64 ------ - Updated the way in which the annual financial data is pieced together (income statement, balance sheet and cash flow statement) this now more accurately reflects what is presented in Yahoo Finance. - Added functionality to enable users to pull in detail from the Yahoo Finance Analysis page. diff --git a/yfinance/version.py b/yfinance/version.py index 56411a229..ef39d788e 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1,5 +1 @@ -<<<<<<< HEAD -version = "0.1.61" -======= -version = "0.1.63" ->>>>>>> fc0f97926ef6a960dc8e6649dc74d06c1840195b +version = "0.1.64" From 87dc9fb34517d3e586348d83c10c965067e1ccda Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sun, 18 Jul 2021 12:16:42 +1000 Subject: [PATCH 08/20] Potential issues flagged by CodeFactor updated. Believe that the warnings with regards to "statement seems to have no effect" is because the import on this python file is yfinance (master version 1.63). --- yfinance/base.py | 6 +++--- yfinance/utils.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 6e7201472..dd729bfc7 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -530,11 +530,11 @@ def cleanup(data): analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] try: self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) - except: + except Exception as e: self._analyst_trend_details = _pd.DataFrame() try: self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T - except: + except Exception as e: self._analyst_price_target = _pd.DataFrame() earnings_estimate = [] revenue_estimate = [] @@ -550,7 +550,7 @@ def cleanup(data): revenue_dict['period'] = key['period'] revenue_dict['endDate'] = key['endDate'] revenue_estimate.append(revenue_dict) - except: + except Exception as e: pass self._rev_est = _pd.DataFrame(revenue_estimate) self._eps_est = _pd.DataFrame(earnings_estimate) diff --git a/yfinance/utils.py b/yfinance/utils.py index 1acfc2a80..831693f26 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -120,11 +120,11 @@ def retreive_financial_details(data): for each in data['timeSeries'][key]: # Loop through the years time_series_dict[each['asOfDate']] = each['reportedValue'] # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] - if each['periodType'] == 'TTM': - TTM_dicts.append(time_series_dict) - elif each['periodType'] == '12M': - Annual_dicts.append(time_series_dict) - except: + if each['periodType'] == 'TTM': + TTM_dicts.append(time_series_dict) + elif each['periodType'] == '12M': + Annual_dicts.append(time_series_dict) + except Exception as e: pass return TTM_dicts, Annual_dicts From e7c55bbdecfa280be3bd96ea4bfa3cf4cb1b81a2 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sun, 18 Jul 2021 12:37:15 +1000 Subject: [PATCH 09/20] Updated to resolve Travis CI Build Fail. --- yfinance/base.py | 72 ++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index dd729bfc7..439f65f5d 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -466,43 +466,49 @@ def cleanup(data): pass #------------------ Income Statement ------------------ - data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) + try: + data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) + + data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(financials_template_ttm_order) - Annual = Annual.reindex(financials_template_annual_order) - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _income_statement = Annual.merge(TTM, left_index=True, right_index=True) - _income_statement.index = utils.camel2title(_income_statement.T) - _income_statement['level_detail'] = financials_level_detail - _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) - self._income_statement = _income_statement.dropna(how='all') + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(financials_template_ttm_order) + Annual = Annual.reindex(financials_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _income_statement = Annual.merge(TTM, left_index=True, right_index=True) + _income_statement.index = utils.camel2title(_income_statement.T) + _income_statement['level_detail'] = financials_level_detail + _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) + self._income_statement = _income_statement.dropna(how='all') + except Exception as e: + self._income_statement = _pd.DataFrame() #------------------ Balance Sheet ------------------ - balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) - data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) - - data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - Annual = Annual.reindex(balance_sheet_template_annual_order) - Annual.index = Annual.index.str.replace(r'annual','') - Annual.index = utils.camel2title(Annual.T) - _balance_sheet = Annual - _balance_sheet['level_detail'] = balance_sheet_level_detail - _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) - self._balance_sheet = _balance_sheet.dropna(how='all') + try: + balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) + data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) + + data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + Annual = Annual.reindex(balance_sheet_template_annual_order) + Annual.index = Annual.index.str.replace(r'annual','') + Annual.index = utils.camel2title(Annual.T) + _balance_sheet = Annual + _balance_sheet['level_detail'] = balance_sheet_level_detail + _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) + self._balance_sheet = _balance_sheet.dropna(how='all') + except Exception as e: + self._balance_sheet = _pd.DataFrame() + #------------------ Cash Flow Statement ------------------ cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. From 7a395c37e9ab3697e3fd378c30c242f5f4667135 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sun, 18 Jul 2021 20:51:33 +1000 Subject: [PATCH 10/20] Updated base with some exceptions and utils. --- yfinance/base.py | 49 ++++++++++++++++++++++++++--------------------- yfinance/utils.py | 8 ++++---- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 439f65f5d..fe36fac7a 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -510,30 +510,35 @@ def cleanup(data): self._balance_sheet = _pd.DataFrame() #------------------ Cash Flow Statement ------------------ - cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) - data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) + try: + cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) + data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) + + data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(cash_flow_template_ttm_order) - Annual = Annual.reindex(cash_flow_template_annual_order) - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _cash_flow_statement = Annual.merge(TTM, left_index=True, right_index=True) - _cash_flow_statement.index = utils.camel2title(_cash_flow_statement.T) - _cash_flow_statement['level_detail'] = cash_flow_level_detail - _cash_flow_statement = _cash_flow_statement.set_index([_cash_flow_statement.index,'level_detail']) - self._cash_flow_statement = _cash_flow_statement.dropna(how='all') - + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(cash_flow_template_ttm_order) + Annual = Annual.reindex(cash_flow_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _cash_flow_statement = Annual.merge(TTM, left_index=True, right_index=True) + _cash_flow_statement.index = utils.camel2title(_cash_flow_statement.T) + _cash_flow_statement['level_detail'] = cash_flow_level_detail + _cash_flow_statement = _cash_flow_statement.set_index([_cash_flow_statement.index,'level_detail']) + self._cash_flow_statement = _cash_flow_statement.dropna(how='all') + except Exception as e: + self._cash_flow_statement = _pd.DataFrame() #------------------ Analysis Data/Analyst Forecasts ------------------ - analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) - analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + try: + analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) + analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + except Exception as e: + analysis_data = {} try: self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) except Exception as e: diff --git a/yfinance/utils.py b/yfinance/utils.py index 831693f26..8bfd0d523 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -120,10 +120,10 @@ def retreive_financial_details(data): for each in data['timeSeries'][key]: # Loop through the years time_series_dict[each['asOfDate']] = each['reportedValue'] # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] - if each['periodType'] == 'TTM': - TTM_dicts.append(time_series_dict) - elif each['periodType'] == '12M': - Annual_dicts.append(time_series_dict) + if each['periodType'] == 'TTM': + TTM_dicts.append(time_series_dict) + elif each['periodType'] == '12M': + Annual_dicts.append(time_series_dict) except Exception as e: pass return TTM_dicts, Annual_dicts From 3f23c067f9a426bb1364a74c5e0f2c392e8a64fc Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sun, 8 Aug 2021 13:46:53 +1000 Subject: [PATCH 11/20] Updated to ensure .info is brought in correctly. --- yfinance/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yfinance/base.py b/yfinance/base.py index fe36fac7a..ab785cd95 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -351,6 +351,7 @@ def cleanup(data): #------------------ Sustainability ------------------ data = utils.get_json(ticker_url, proxy, self.session) + data = data['context']['dispatcher']['stores']['QuoteSummaryStore'] d = {} try: if isinstance(data.get('esgScores'), dict): From c80bfc0417d7d6d3df44cdedd2d9fe7557d9ca05 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Sat, 8 Oct 2022 20:31:42 +0100 Subject: [PATCH 12/20] Manually merge pull request #776 - Fix & enhance annual financials --- yfinance/base.py | 189 +++++++++++++++++++++++++++++++++++++++++---- yfinance/ticker.py | 50 +++++++++--- yfinance/utils.py | 77 ++++++++++++++++-- 3 files changed, 284 insertions(+), 32 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 1dceafc38..29a0a89ea 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -74,8 +74,8 @@ def __init__(self, ticker, session=None): self._earnings_history = None self._earnings = None - self._financials = None - self._balancesheet = None + self._income_stmt = None + self._balance_sheet = None self._cashflow = None # accept isin as ticker @@ -362,6 +362,7 @@ def _get_info(self, proxy=None): # get info and sustainability data = utils.get_json(ticker_url, proxy, self.session) + data = data['context']['dispatcher']['stores']['QuoteSummaryStore'] # sustainability d = {} @@ -456,6 +457,9 @@ def _get_info(self, proxy=None): def _get_fundamentals(self, proxy=None): def cleanup(data): + ''' + The cleanup function is used for parsing yahoo finance json financial statement data into a pandas dataframe format. + ''' df = _pd.DataFrame(data).drop(columns=['maxAge']) for col in df.columns: df[col] = _np.where( @@ -526,22 +530,24 @@ def cleanup(data): self._get_info(proxy) # get fundamentals - data = utils.get_json(ticker_url + '/financials', proxy, self.session) + financials_data = utils.get_json(ticker_url + '/financials', proxy, self.session) + data = financials_data['context']['dispatcher']['stores']['QuoteSummaryStore'] # generic patterns self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} self._cashflow = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._balancesheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._financials = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} + self._balance_sheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} + self._income = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} for key in ( (self._cashflow, 'cashflowStatement', 'cashflowStatements'), - (self._balancesheet, 'balanceSheet', 'balanceSheetStatements'), - (self._financials, 'incomeStatement', 'incomeStatementHistory') + (self._balance_sheet, 'balanceSheet', 'balanceSheetStatements'), + (self._income, 'incomeStatement', 'incomeStatementHistory') ): item = key[1] + 'History' if isinstance(data.get(item), dict): try: - key[0]['yearly'] = cleanup(data[item][key[2]]) + # key[0]['yearly'] = cleanup(data[item][key[2]]) + key[0]['yearly_legacy'] = cleanup(data[item][key[2]]) except Exception: pass @@ -570,6 +576,118 @@ def cleanup(data): except Exception: pass + #------------------ Income Statement ------------------ + try: + # Grab the financial template store. This details the order in which the financials should be presented. + data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) + + # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(financials_template_ttm_order) + Annual = Annual.reindex(financials_template_annual_order) + # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _income_statement = Annual.merge(TTM, left_index=True, right_index=True) + _income_statement.index = utils.camel2title(_income_statement.T) + _income_statement['level_detail'] = financials_level_detail + _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) + _income_statement = _income_statement[sorted(_income_statement.columns, reverse=True)] + # _income_statement = _income_statement[[]] + self._income["yearly"] = _income_statement.dropna(how='all') + except Exception as e: + self._income["yearly"] = _pd.DataFrame() + + #------------------ Balance Sheet ------------------ + try: + balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) + data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) + + data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + Annual = Annual.reindex(balance_sheet_template_annual_order) + Annual.index = Annual.index.str.replace(r'annual','') + Annual.index = utils.camel2title(Annual.T) + _balance_sheet = Annual + _balance_sheet['level_detail'] = balance_sheet_level_detail + _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) + _balance_sheet = _balance_sheet[sorted(_balance_sheet.columns, reverse=True)] + self._balance_sheet["yearly"] = _balance_sheet.dropna(how='all') + except Exception as e: + self._balance_sheet["yearly"] = _pd.DataFrame() + + #------------------ Cash Flow Statement ------------------ + try: + cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) + data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) + + data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(cash_flow_template_ttm_order) + Annual = Annual.reindex(cash_flow_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _cash_flow_stmt = Annual.merge(TTM, left_index=True, right_index=True) + _cash_flow_stmt.index = utils.camel2title(_cash_flow_stmt.T) + _cash_flow_stmt['level_detail'] = cash_flow_level_detail + _cash_flow_stmt = _cash_flow_stmt.set_index([_cash_flow_stmt.index,'level_detail']) + _cash_flow_stmt = _cash_flow_stmt[sorted(_cash_flow_stmt.columns, reverse=True)] + self._cashflow["yearly"] = _cash_flow_stmt.dropna(how='all') + except Exception as e: + self._cashflow["yearly"] = _pd.DataFrame() + + #------------------ Analysis Data/Analyst Forecasts ------------------ + try: + analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) + analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + except Exception as e: + analysis_data = {} + try: + self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) + except Exception as e: + self._analyst_trend_details = _pd.DataFrame() + try: + self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T + except Exception as e: + self._analyst_price_target = _pd.DataFrame() + earnings_estimate = [] + revenue_estimate = [] + if len(self._analyst_trend_details) != 0: + for key in analysis_data['earningsTrend']['trend']: + try: + earnings_dict = key['earningsEstimate'] + earnings_dict['period'] = key['period'] + earnings_dict['endDate'] = key['endDate'] + earnings_estimate.append(earnings_dict) + + revenue_dict = key['revenueEstimate'] + revenue_dict['period'] = key['period'] + revenue_dict['endDate'] = key['endDate'] + revenue_estimate.append(revenue_dict) + except Exception as e: + pass + self._rev_est = _pd.DataFrame(revenue_estimate) + self._eps_est = _pd.DataFrame(earnings_estimate) + else: + self._rev_est = _pd.DataFrame() + self._eps_est = _pd.DataFrame() + # shares outstanding try: # keep only years with non None data @@ -708,6 +826,34 @@ def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs): return data.to_dict() return data + def get_current_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._analyst_trend_details + if as_dict: + return data.to_dict() + return data + + def get_analyst_price_target(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._analyst_price_target + if as_dict: + return data.to_dict() + return data + + def get_rev_forecast(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._rev_est + if as_dict: + return data.to_dict() + return data + + def get_earnings_forecast(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._eps_est + if as_dict: + return data.to_dict() + return data + def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) data = self._earnings[freq] @@ -724,22 +870,31 @@ def get_analysis(self, proxy=None, as_dict=False, *args, **kwargs): return data.to_dict() return data - def get_financials(self, proxy=None, as_dict=False, freq="yearly"): + def get_income_stmt(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) - data = self._financials[freq] + data = self._income[freq] if as_dict: return data.to_dict() return data - - def get_balancesheet(self, proxy=None, as_dict=False, freq="yearly"): + def get_income_stmt_legacy(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) - data = self._balancesheet[freq] + data = self._income[freq+"_legacy"] if as_dict: return data.to_dict() return data def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"): - return self.get_balancesheet(proxy, as_dict, freq) + self._get_fundamentals(proxy=proxy) + data = self._balance_sheet[freq] + if as_dict: + return data.to_dict() + return data + def get_balance_sheet_legacy(self, proxy=None, as_dict=False, freq="yearly"): + self._get_fundamentals(proxy=proxy) + data = self._balance_sheet[freq+"_legacy"] + if as_dict: + return data.to_dict() + return data def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) @@ -747,6 +902,12 @@ def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"): if as_dict: return data.to_dict() return data + def get_cashflow_legacy(self, proxy=None, as_dict=False, freq="yearly"): + self._get_fundamentals(proxy=proxy) + data = self._cashflow[freq+"_legacy"] + if as_dict: + return data.to_dict() + return data def get_dividends(self, proxy=None): if self._history is None: diff --git a/yfinance/ticker.py b/yfinance/ticker.py index cdd3d1471..5e425b1de 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -163,32 +163,42 @@ def quarterly_earnings(self): return self.get_earnings(freq='quarterly') @property - def financials(self): - return self.get_financials() - + def income_stmt(self): + return self.get_income_stmt() @property - def quarterly_financials(self): - return self.get_financials(freq='quarterly') + def income_stmt_legacy(self): + return self.get_income_stmt_legacy() @property - def balance_sheet(self): - return self.get_balancesheet() + def quarterly_income_stmt(self): + return self.get_income_stmt(freq='quarterly') @property - def quarterly_balance_sheet(self): - return self.get_balancesheet(freq='quarterly') - + def balance_sheet(self): + return self.get_balance_sheet() @property def balancesheet(self): - return self.get_balancesheet() + return self.get_balance_sheet() + @property + def balance_sheet_legacy(self): + return self.get_balance_sheet_legacy() + @property + def balancesheet_legacy(self): + return self.get_balance_sheet_legacy() @property + def quarterly_balance_sheet(self): + return self.get_balance_sheet(freq='quarterly') + @property def quarterly_balancesheet(self): - return self.get_balancesheet(freq='quarterly') + return self.get_balance_sheet(freq='quarterly') @property def cashflow(self): return self.get_cashflow() + @property + def cashflow_legacy(self): + return self.get_cashflow_legacy() @property def quarterly_cashflow(self): @@ -198,6 +208,22 @@ def quarterly_cashflow(self): def sustainability(self): return self.get_sustainability() + @property + def current_recommendations(self): + return self.get_current_recommendations() + + @property + def analyst_price_target(self): + return self.get_analyst_price_target() + + @property + def revenue_forecasts(self): + return self.get_rev_forecast() + + @property + def earnings_forecasts(self): + return self.get_earnings_forecast() + @property def options(self): if not self._expirations: diff --git a/yfinance/utils.py b/yfinance/utils.py index d611575c3..943218dc0 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -107,18 +107,22 @@ def get_html(url, proxy=None, session=None): def get_json(url, proxy=None, session=None): + ''' + get_json returns a python dictionary of the store detail for yahoo finance web pages. + ''' session = session or _requests html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text - if "QuoteSummaryStore" not in html: - html = session.get(url=url, proxies=proxy).text - if "QuoteSummaryStore" not in html: - return {} + # if "QuoteSummaryStore" not in html: + # html = session.get(url=url, proxies=proxy).text + # if "QuoteSummaryStore" not in html: + # return {} json_str = html.split('root.App.main =')[1].split( '(this)')[0].split(';\n}')[0].strip() - data = _json.loads(json_str)[ - 'context']['dispatcher']['stores']['QuoteSummaryStore'] + # data = _json.loads(json_str)['context']['dispatcher']['stores']['QuoteSummaryStore'] + data = _json.loads(json_str) + # add data about Shares Outstanding for companies' tickers if they are available try: data['annualBasicAverageShares'] = _json.loads( @@ -127,6 +131,7 @@ def get_json(url, proxy=None, session=None): except Exception: pass + ## TODO: Why dumping and parsing again? # return data new_data = _json.dumps(data).replace('{}', 'null') new_data = _re.sub( @@ -134,6 +139,66 @@ def get_json(url, proxy=None, session=None): return _json.loads(new_data) +def build_template(data): + ''' + build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". + + Returns: + - template_annual_order: The order that annual figures should be listed in. + - template_ttm_order: The order that TTM (Trailing Twelve Month) figures should be listed in. + - level_detail: The level of each individual line item. E.g. for the "/financials" webpage, "Total Revenue" is a level 0 item and is the summation of "Operating Revenue" and "Excise Taxes" which are level 1 items. + + ''' + template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object. + template_annual_order = [] # Save the annual ordering to an object. + level_detail = [] #Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this) + for key in data['template']: # Loop through the json to retreive the exact financial order whilst appending to the objects + template_ttm_order.append('trailing{}'.format(key['key'])) + template_annual_order.append('annual{}'.format(key['key'])) + level_detail.append(0) + if 'children' in key: + for child1 in key['children']: # Level 1 + template_ttm_order.append('trailing{}'.format(child1['key'])) + template_annual_order.append('annual{}'.format(child1['key'])) + level_detail.append(1) + if 'children' in child1: + for child2 in child1['children']: # Level 2 + template_ttm_order.append('trailing{}'.format(child2['key'])) + template_annual_order.append('annual{}'.format(child2['key'])) + level_detail.append(2) + if 'children' in child2: + for child3 in child2['children']: # Level 3 + template_ttm_order.append('trailing{}'.format(child3['key'])) + template_annual_order.append('annual{}'.format(child3['key'])) + level_detail.append(3) + return template_ttm_order, template_annual_order, level_detail + +def retreive_financial_details(data): + ''' + retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". + + Returns: + - TTM_dicts: A dictionary full of all of the available Trailing Twelve Month figures, this can easily be converted to a pandas dataframe. + - Annual_dicts: A dictionary full of all of the available Annual figures, this can easily be converted to a pandas dataframe. + ''' + TTM_dicts = [] # Save a dictionary object to store the TTM financials. + Annual_dicts = [] # Save a dictionary object to store the Annual financials. + for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures. + try: + if len(data['timeSeries'][key]) > 0: + time_series_dict = {} + time_series_dict['index'] = key + for each in data['timeSeries'][key]: # Loop through the years + time_series_dict[each['asOfDate']] = each['reportedValue'] + # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] + if each['periodType'] == 'TTM': + TTM_dicts.append(time_series_dict) + elif each['periodType'] == '12M': + Annual_dicts.append(time_series_dict) + except Exception as e: + pass + return TTM_dicts, Annual_dicts + def camel2title(o): return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o] From 34e1b2f15792edc7a22aa1597ea5f42ff7fda7b2 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 9 Oct 2022 17:20:07 +0100 Subject: [PATCH 13/20] Add new time-series functions and compare against new scraping --- yfinance/base.py | 243 +++++++++++++++++++++++++++++++++------------- yfinance/utils.py | 73 ++++++++++++++ 2 files changed, 246 insertions(+), 70 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 29a0a89ea..9e83f09fc 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -28,6 +28,7 @@ import pandas as _pd import numpy as _np import re as _re +from pprint import pprint try: from urllib.parse import quote as urlencode @@ -46,6 +47,9 @@ _SCRAPE_URL_ = 'https://finance.yahoo.com/quote' _ROOT_URL_ = 'https://finance.yahoo.com' +_DEV = False +# _DEV = True + class TickerBase(): def __init__(self, ticker, session=None): @@ -576,81 +580,180 @@ def cleanup(data): except Exception: pass + if _DEV: + # Fetching using get_financials_time_series() missing TTM column so cannot completely replace git-shogg's method + annual_income_stmt_ts = utils.get_financials_time_series(self.ticker, "financials", "annual", ticker_url, proxy, self.session) + annual_balance_sheet_ts = utils.get_financials_time_series(self.ticker, "balance-sheet", "annual", ticker_url, proxy, self.session) + annual_cashflow_ts = utils.get_financials_time_series(self.ticker, "cash-flow", "annual", ticker_url, proxy, self.session) + + qtr_income_stmt_ts = utils.get_financials_time_series(self.ticker, "financials", "quarterly", ticker_url, proxy, self.session) + qtr_balance_sheet_ts = utils.get_financials_time_series(self.ticker, "balance-sheet", "quarterly", ticker_url, proxy, self.session) + qtr_cashflow_ts = utils.get_financials_time_series(self.ticker, "cash-flow", "quarterly", ticker_url, proxy, self.session) + #------------------ Income Statement ------------------ - try: - # Grab the financial template store. This details the order in which the financials should be presented. - data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) - - # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(financials_template_ttm_order) - Annual = Annual.reindex(financials_template_annual_order) - # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _income_statement = Annual.merge(TTM, left_index=True, right_index=True) - _income_statement.index = utils.camel2title(_income_statement.T) - _income_statement['level_detail'] = financials_level_detail - _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) - _income_statement = _income_statement[sorted(_income_statement.columns, reverse=True)] - # _income_statement = _income_statement[[]] - self._income["yearly"] = _income_statement.dropna(how='all') - except Exception as e: - self._income["yearly"] = _pd.DataFrame() + # try: + # Grab the financial template store. This details the order in which the financials should be presented. + data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + financials_template_ttm_order, financials_template_annual_order, financials_level_detail = utils.build_template(data) + + # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(financials_template_ttm_order) + Annual = Annual.reindex(financials_template_annual_order) + + # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _income_statement = Annual.merge(TTM, left_index=True, right_index=True) + + if _DEV: + annual_scraped_keys = _income_statement.dropna(how="all").index.values + + # First, compare not-nan rows against get_financials_time_series(), are same rows returned? + annual_ts_keys = annual_income_stmt_ts.index.values + missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) + missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) + if len(missing_from_ts)>0: + print("") ; print("") + print("WARNING: These scraped keys missing from time-series annual income-stmt:") + print(_income_statement[_income_statement.index.isin(missing_from_ts)][sorted(_income_statement.columns, reverse=True)]) + if len(missing_from_scraped)>0: + print("") ; print("") + print("WARNING: These time-series keys missing from scraped annual income-stmt. Normally they are present in Yahoo.com so why missing here?") + print(annual_income_stmt_ts[annual_income_stmt_ts.index.isin(missing_from_scraped)]) + + # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used + qtr_ts_keys = qtr_income_stmt_ts.index.values + missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) + # Remove what already know is missing + missing_from_annual -= missing_from_scraped + if len(missing_from_annual) > 0: + print("WARNING: These keys in time-series quarterly income-stmt are MISSING from scraped annual, complicates table construction") + print(qtr_income_stmt_ts[qtr_income_stmt_ts.index.isin(missing_from_annual)]) + else: + print("GREAT: Should be easy to fix quarterly income-stmt, just copy MultiIndex from annual (once you fix missing keys)") + + _income_statement.index = utils.camel2title(_income_statement.T) + _income_statement['level_detail'] = financials_level_detail + _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) + _income_statement = _income_statement[sorted(_income_statement.columns, reverse=True)] + _income_statement = _income_statement.dropna(how='all') + self._income["yearly"] = _income_statement + # except Exception as e: + # self._income["yearly"] = _pd.DataFrame() #------------------ Balance Sheet ------------------ - try: - balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) - data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) - - data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - Annual = Annual.reindex(balance_sheet_template_annual_order) - Annual.index = Annual.index.str.replace(r'annual','') - Annual.index = utils.camel2title(Annual.T) - _balance_sheet = Annual - _balance_sheet['level_detail'] = balance_sheet_level_detail - _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) - _balance_sheet = _balance_sheet[sorted(_balance_sheet.columns, reverse=True)] - self._balance_sheet["yearly"] = _balance_sheet.dropna(how='all') - except Exception as e: - self._balance_sheet["yearly"] = _pd.DataFrame() + # try: + balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) + data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] + balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_level_detail = utils.build_template(data) + + data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + Annual = Annual.reindex(balance_sheet_template_annual_order) + Annual.index = Annual.index.str.replace(r'annual','') + + if _DEV: + annual_scraped_keys = Annual.dropna(how="all").index.values + + # First, compare not-nan rows against get_financials_time_series(), are same rows returned? + annual_ts_keys = annual_balance_sheet_ts.index.values + missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) + missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) + if len(missing_from_ts)>0: + print("") ; print("") + print("WARNING: These scraped keys missing from time-series annual balance-sheet:") + print(Annual[Annual.index.isin(missing_from_ts)][sorted(Annual.columns, reverse=True)]) + if len(missing_from_scraped)>0: + print("") ; print("") + print("WARNING: These time-series keys missing from scraped annual balance-sheet. Normally they are present in Yahoo.com so why missing here?") + print(annual_balance_sheet_ts[annual_balance_sheet_ts.index.isin(missing_from_scraped)]) + + # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used + qtr_ts_keys = qtr_balance_sheet_ts.index.values + missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) + # Remove what already know is missing + missing_from_annual -= missing_from_scraped + if len(missing_from_annual) > 0: + print("") ; print("") + print("WARNING: These keys in time-series quarterly balance-sheet are MISSING from scraped annual, complicates table construction") + print(qtr_balance_sheet_ts[qtr_balance_sheet_ts.index.isin(missing_from_annual)]) + else: + print("") ; print("") + print("GREAT: Should be easy to fix quarterly balance-sheet, just copy MultiIndex from annual (once you fix missing keys)") + + Annual.index = utils.camel2title(Annual.T) + _balance_sheet = Annual + _balance_sheet['level_detail'] = balance_sheet_level_detail + _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) + _balance_sheet = _balance_sheet[sorted(_balance_sheet.columns, reverse=True)] + self._balance_sheet["yearly"] = _balance_sheet.dropna(how='all') + # except Exception as e: + # self._balance_sheet["yearly"] = _pd.DataFrame() #------------------ Cash Flow Statement ------------------ - try: - cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) - data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) - - data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(cash_flow_template_ttm_order) - Annual = Annual.reindex(cash_flow_template_annual_order) - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _cash_flow_stmt = Annual.merge(TTM, left_index=True, right_index=True) - _cash_flow_stmt.index = utils.camel2title(_cash_flow_stmt.T) - _cash_flow_stmt['level_detail'] = cash_flow_level_detail - _cash_flow_stmt = _cash_flow_stmt.set_index([_cash_flow_stmt.index,'level_detail']) - _cash_flow_stmt = _cash_flow_stmt[sorted(_cash_flow_stmt.columns, reverse=True)] - self._cashflow["yearly"] = _cash_flow_stmt.dropna(how='all') - except Exception as e: - self._cashflow["yearly"] = _pd.DataFrame() + # try: + cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) + data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. + cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_level_detail = utils.build_template(data) + + data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) + + TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") + Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") + # Combine the raw financial details and the template + TTM = TTM.reindex(cash_flow_template_ttm_order) + Annual = Annual.reindex(cash_flow_template_annual_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + Annual.index = Annual.index.str.replace(r'annual','') + _cash_flow_stmt = Annual.merge(TTM, left_index=True, right_index=True) + + if _DEV: + annual_scraped_keys = _cash_flow_stmt.dropna(how="all").index.values + + # First, compare not-nan rows against get_financials_time_series(), are same rows returned? + annual_ts_keys = annual_cashflow_ts.index.values + missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) + missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) + if len(missing_from_ts)>0: + print("") ; print("") + print("WARNING: These scraped keys missing from time-series annual cash-flow:") + print(_cash_flow_stmt[_cash_flow_stmt.index.isin(missing_from_ts)][sorted(_cash_flow_stmt.columns, reverse=True)]) + if len(missing_from_scraped)>0: + print("") ; print("") + print("WARNING: These time-series keys missing from scraped annual cash-flow. Normally they are present in Yahoo.com so why missing here?") + print(annual_cashflow_ts[annual_cashflow_ts.index.isin(missing_from_scraped)]) + + # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used + qtr_ts_keys = qtr_cashflow_ts.index.values + missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) + # Remove what already know is missing + missing_from_annual -= missing_from_scraped + if len(missing_from_annual) > 0: + print("") ; print("") + print("WARNING: These keys in time-series quarterly cash-flow are MISSING from scraped annual, complicates table construction") + print(qtr_cashflow_ts[qtr_cashflow_ts.index.isin(missing_from_annual)]) + else: + print("") ; print("") + print("GREAT: Should be easy to fix quarterly cash-flow, just copy MultiIndex from annual (once you fix missing keys)") + + _cash_flow_stmt.index = utils.camel2title(_cash_flow_stmt.T) + _cash_flow_stmt['level_detail'] = cash_flow_level_detail + _cash_flow_stmt = _cash_flow_stmt.set_index([_cash_flow_stmt.index,'level_detail']) + _cash_flow_stmt = _cash_flow_stmt[sorted(_cash_flow_stmt.columns, reverse=True)] + self._cashflow["yearly"] = _cash_flow_stmt.dropna(how='all') + # except Exception as e: + # self._cashflow["yearly"] = _pd.DataFrame() #------------------ Analysis Data/Analyst Forecasts ------------------ try: diff --git a/yfinance/utils.py b/yfinance/utils.py index 943218dc0..f6396fd14 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -21,6 +21,7 @@ from __future__ import print_function +import time as _time import datetime as _datetime import pytz as _tz import requests as _requests @@ -200,6 +201,78 @@ def retreive_financial_details(data): return TTM_dicts, Annual_dicts +def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session=None): + acceptable_names = ["financials", "balance-sheet", "cash-flow"] + if not name in acceptable_names: + raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) + acceptable_timestamps = ["annual", "quarterly"] + if not timescale in acceptable_timestamps: + raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps)) + + session = session or _requests + + financials_data = get_json(ticker_url+'/'+name, proxy, session) + + # Step 1: get the keys: + def _finditem1(key, obj): + values = [] + if isinstance(obj,dict): + if key in obj.keys(): + values.append(obj[key]) + for k,v in obj.items(): + values += _finditem1(key,v) + elif isinstance(obj,list): + for v in obj: + values += _finditem1(key,v) + return values + keys = _finditem1("key",financials_data['context']['dispatcher']['stores']['FinancialTemplateStore']) + + # Step 2: construct url: + ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(ticker) + if len(keys) == 0: + raise Exception("Fetching keys failed") + # yr_url = ts_url_base + "&type=" + ",".join(["annual"+k for k in keys]) + # qtr_url = ts_url_base + "&type=" + ",".join(["quarterly"+k for k in keys]) + # url = qtr_url + url = ts_url_base + "&type=" + ",".join([timescale+k for k in keys]) + # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: + start_dt = _datetime.datetime(2016, 12, 31) + end = (_datetime.datetime.now() + _datetime.timedelta(days=366)) + url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp())) + + # Step 3: fetch and reshape data + json_str = session.get(url=url, proxies=proxy, headers=user_agent_headers).text + json_data = _json.loads(json_str) + data_raw = json_data["timeseries"]["result"] + # data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data + for d in data_raw: + del d["meta"] + + # Now reshape data into a table: + # Step 1: get columns and index: + timestamps = set() + data_unpacked = {} + for x in data_raw: + for k in x.keys(): + if k=="timestamp": + timestamps.update(x[k]) + else: + data_unpacked[k] = x[k] + timestamps = sorted(list(timestamps)) + dates = _pd.to_datetime(timestamps, unit="s") + df = _pd.DataFrame(columns=dates, index=data_unpacked.keys()) + for k,v in data_unpacked.items(): + if df is None: + df = _pd.DataFrame(columns=dates, index=[k]) + df.loc[k] = {_pd.Timestamp(x["asOfDate"]):x["reportedValue"]["raw"] for x in v} + + df.index = df.index.str.replace("^"+timescale, "", regex=True) + + df = df[sorted(df.columns, reverse=True)] + + return df + + def camel2title(o): return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o] From 6c4da51519baa3170eb1682bf45710c13ae6c0b2 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Sat, 15 Oct 2022 11:22:39 +1000 Subject: [PATCH 14/20] Remediated missing annual table line items flagged by @ValueRaider. --- yfinance/base.py | 4 ++-- yfinance/utils.py | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index ab785cd95..8e5a0ecab 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -482,7 +482,7 @@ def cleanup(data): TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). TTM.index = TTM.index.str.replace(r'trailing', '') Annual.index = Annual.index.str.replace(r'annual','') - _income_statement = Annual.merge(TTM, left_index=True, right_index=True) + _income_statement = Annual.merge(TTM, how='outer',left_index=True, right_index=True) _income_statement.index = utils.camel2title(_income_statement.T) _income_statement['level_detail'] = financials_level_detail _income_statement = _income_statement.set_index([_income_statement.index,'level_detail']) @@ -527,7 +527,7 @@ def cleanup(data): TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). TTM.index = TTM.index.str.replace(r'trailing', '') Annual.index = Annual.index.str.replace(r'annual','') - _cash_flow_statement = Annual.merge(TTM, left_index=True, right_index=True) + _cash_flow_statement = Annual.merge(TTM, how='outer',left_index=True, right_index=True) _cash_flow_statement.index = utils.camel2title(_cash_flow_statement.T) _cash_flow_statement['level_detail'] = cash_flow_level_detail _cash_flow_statement = _cash_flow_statement.set_index([_cash_flow_statement.index,'level_detail']) diff --git a/yfinance/utils.py b/yfinance/utils.py index 8bfd0d523..6e1c9e176 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -100,6 +100,16 @@ def build_template(data): template_ttm_order.append('trailing{}'.format(child3['key'])) template_annual_order.append('annual{}'.format(child3['key'])) level_detail.append(3) + if 'children' in child3: + for child4 in child3['children']: # Level 4 + template_ttm_order.append('trailing{}'.format(child4['key'])) + template_annual_order.append('annual{}'.format(child4['key'])) + level_detail.append(4) + if 'children' in child4: + for child5 in child4['children']: # Level 5 + template_ttm_order.append('trailing{}'.format(child5['key'])) + template_annual_order.append('annual{}'.format(child5['key'])) + level_detail.append(5) return template_ttm_order, template_annual_order, level_detail def retreive_financial_details(data): @@ -118,11 +128,14 @@ def retreive_financial_details(data): time_series_dict = {} time_series_dict['index'] = key for each in data['timeSeries'][key]: # Loop through the years - time_series_dict[each['asOfDate']] = each['reportedValue'] + if each == None: + continue + else: + time_series_dict[each['asOfDate']] = each['reportedValue'] # time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue'] - if each['periodType'] == 'TTM': + if 'trailing' in key: TTM_dicts.append(time_series_dict) - elif each['periodType'] == '12M': + elif 'annual' in key: Annual_dicts.append(time_series_dict) except Exception as e: pass From 3b8114c135052ec4a59db88be5e96806d8223018 Mon Sep 17 00:00:00 2001 From: Stephen Hogg Date: Mon, 17 Oct 2022 21:02:58 +1000 Subject: [PATCH 15/20] Functions to minimize get_fundamentals. _DEV tests removed. --- yfinance/base.py | 252 +++----------------------------------------- yfinance/utils.py | 43 ++++++++ yfinance/version.py | 2 +- 3 files changed, 57 insertions(+), 240 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 5fc74c2f5..0b93f241b 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -20,6 +20,7 @@ # from __future__ import print_function +from ctypes import util import time as _time @@ -48,10 +49,6 @@ _SCRAPE_URL_ = 'https://finance.yahoo.com/quote' _ROOT_URL_ = 'https://finance.yahoo.com' -_DEV = False -# _DEV = True - - class TickerBase(): def __init__(self, ticker, session=None): self.ticker = ticker.upper() @@ -582,16 +579,6 @@ def cleanup(data): except Exception: pass - if _DEV: - # Fetching using get_financials_time_series() missing TTM column so cannot completely replace git-shogg's method - annual_income_stmt_ts = utils.get_financials_time_series(self.ticker, "financials", "annual", ticker_url, proxy, self.session) - annual_balance_sheet_ts = utils.get_financials_time_series(self.ticker, "balance-sheet", "annual", ticker_url, proxy, self.session) - annual_cashflow_ts = utils.get_financials_time_series(self.ticker, "cash-flow", "annual", ticker_url, proxy, self.session) - - qtr_income_stmt_ts = utils.get_financials_time_series(self.ticker, "financials", "quarterly", ticker_url, proxy, self.session) - qtr_balance_sheet_ts = utils.get_financials_time_series(self.ticker, "balance-sheet", "quarterly", ticker_url, proxy, self.session) - qtr_cashflow_ts = utils.get_financials_time_series(self.ticker, "cash-flow", "quarterly", ticker_url, proxy, self.session) - #------------------ Income Statement ------------------ try: # Grab the financial template store. This details the order in which the financials should be presented. @@ -602,61 +589,11 @@ def cleanup(data): data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(financials_template_ttm_order) - Annual = Annual.reindex(financials_template_annual_order) - - # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _income_stmt = Annual.merge(TTM, left_index=True, right_index=True) - - if _DEV: - annual_scraped_keys = _income_stmt.dropna(how="all").index.values - - # First, compare not-nan rows against get_financials_time_series(), are same rows returned? - annual_ts_keys = annual_income_stmt_ts.index.values - missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) - missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) - if len(missing_from_ts)>0: - print("") ; print("") - print("WARNING: These scraped keys missing from time-series annual income-stmt:") - print(_income_stmt[_income_stmt.index.isin(missing_from_ts)][sorted(_income_stmt.columns, reverse=True)]) - if len(missing_from_scraped)>0: - print("") ; print("") - print("WARNING: These time-series keys missing from scraped annual income-stmt. Normally they are present in Yahoo.com so why missing here?") - print(annual_income_stmt_ts[annual_income_stmt_ts.index.isin(missing_from_scraped)]) - - # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used - qtr_ts_keys = qtr_income_stmt_ts.index.values - missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) - # Remove what already know is missing - missing_from_annual -= missing_from_scraped - if len(missing_from_annual) > 0: - print("WARNING: These keys in time-series quarterly income-stmt are MISSING from scraped annual, complicates table construction") - print(qtr_income_stmt_ts[qtr_income_stmt_ts.index.isin(missing_from_annual)]) - else: - print("GREAT: Should be easy to fix quarterly income-stmt, just copy MultiIndex from annual (once you fix missing keys)") - - _income_stmt.index = utils.camel2title(_income_stmt.T) - _income_stmt['level_detail'] = financials_level_detail - _income_stmt = _income_stmt.set_index([_income_stmt.index,'level_detail']) - _income_stmt = _income_stmt[sorted(_income_stmt.columns, reverse=True)] - _income_stmt = _income_stmt.dropna(how='all') + _income_stmt = utils.format_annual_financial_statement(financials_level_detail, Annual_dicts, financials_template_annual_order, TTM_dicts, financials_template_ttm_order) self._income["yearly"] = _income_stmt - _income_stmt_qtr = utils.get_financials_time_series(self.ticker, "financials", "quarterly", ticker_url, proxy, self.session) - _income_stmt_qtr = _income_stmt_qtr.reindex(financials_template_order) - _income_stmt_qtr.index = utils.camel2title(_income_stmt_qtr.T) - _income_stmt_qtr['level_detail'] = financials_level_detail - _income_stmt_qtr = _income_stmt_qtr.set_index([_income_stmt_qtr.index,'level_detail']) - _income_stmt_qtr = _income_stmt_qtr[sorted(_income_stmt_qtr.columns, reverse=True)] - _income_stmt_qtr = _income_stmt_qtr.dropna(how='all') - _income_stmt_qtr.columns = _pd.to_datetime(_income_stmt_qtr.columns).date + _income_stmt_qtr = utils.format_quarterly_financial_statement(_income_stmt_qtr, financials_level_detail, financials_template_order) self._income["quarterly"] = _income_stmt_qtr except Exception as e: @@ -672,58 +609,13 @@ def cleanup(data): data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - Annual = Annual.reindex(balance_sheet_template_annual_order) - Annual.index = Annual.index.str.replace(r'annual','') - - if _DEV: - annual_scraped_keys = Annual.dropna(how="all").index.values - - # First, compare not-nan rows against get_financials_time_series(), are same rows returned? - annual_ts_keys = annual_balance_sheet_ts.index.values - missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) - missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) - if len(missing_from_ts)>0: - print("") ; print("") - print("WARNING: These scraped keys missing from time-series annual balance-sheet:") - print(Annual[Annual.index.isin(missing_from_ts)][sorted(Annual.columns, reverse=True)]) - if len(missing_from_scraped)>0: - print("") ; print("") - print("WARNING: These time-series keys missing from scraped annual balance-sheet. Normally they are present in Yahoo.com so why missing here?") - print(annual_balance_sheet_ts[annual_balance_sheet_ts.index.isin(missing_from_scraped)]) - - # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used - qtr_ts_keys = qtr_balance_sheet_ts.index.values - missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) - # Remove what already know is missing - missing_from_annual -= missing_from_scraped - if len(missing_from_annual) > 0: - print("") ; print("") - print("WARNING: These keys in time-series quarterly balance-sheet are MISSING from scraped annual, complicates table construction") - print(qtr_balance_sheet_ts[qtr_balance_sheet_ts.index.isin(missing_from_annual)]) - else: - print("") ; print("") - print("GREAT: Should be easy to fix quarterly balance-sheet, just copy MultiIndex from annual (once you fix missing keys)") - - Annual.index = utils.camel2title(Annual.T) - _balance_sheet = Annual - _balance_sheet['level_detail'] = balance_sheet_level_detail - _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) - _balance_sheet = _balance_sheet[sorted(_balance_sheet.columns, reverse=True)] - self._balance_sheet["yearly"] = _balance_sheet.dropna(how='all') - - # except Exception as e: - # self._balance_sheet["yearly"] = _pd.DataFrame() - + _balance_sheet = utils.format_annual_financial_statement(balance_sheet_level_detail, Annual_dicts, balance_sheet_template_annual_order) + self._balance_sheet["yearly"] = _balance_sheet + _balance_sheet_qtr = utils.get_financials_time_series(self.ticker, "balance-sheet", "quarterly", ticker_url, proxy, self.session) - _balance_sheet_qtr = _balance_sheet_qtr.reindex(balance_sheet_template_order) - _balance_sheet_qtr.index = utils.camel2title(_balance_sheet_qtr.T) - _balance_sheet_qtr['level_detail'] = balance_sheet_level_detail - _balance_sheet_qtr = _balance_sheet_qtr.set_index([_balance_sheet_qtr.index,'level_detail']) - _balance_sheet_qtr = _balance_sheet_qtr[sorted(_balance_sheet_qtr.columns, reverse=True)] - _balance_sheet_qtr = _balance_sheet_qtr.dropna(how='all') - _balance_sheet_qtr.columns = _pd.to_datetime(_balance_sheet_qtr.columns).date + _balance_sheet_qtr = utils.format_quarterly_financial_statement(_balance_sheet_qtr, balance_sheet_level_detail, balance_sheet_template_order) self._balance_sheet["quarterly"] = _balance_sheet_qtr + except Exception as e: self._balance_sheet["yearly"] = _pd.DataFrame() self._balance_sheet["quarterly"] = _pd.DataFrame() @@ -736,61 +628,11 @@ def cleanup(data): data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Combine the raw financial details and the template - TTM = TTM.reindex(cash_flow_template_ttm_order) - Annual = Annual.reindex(cash_flow_template_annual_order) - TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - TTM.index = TTM.index.str.replace(r'trailing', '') - Annual.index = Annual.index.str.replace(r'annual','') - _cashflow = Annual.merge(TTM, left_index=True, right_index=True) - - if _DEV: - annual_scraped_keys = _cashflow.dropna(how="all").index.values - - # First, compare not-nan rows against get_financials_time_series(), are same rows returned? - annual_ts_keys = annual_cashflow_ts.index.values - missing_from_ts = set(annual_scraped_keys)-set(annual_ts_keys) - missing_from_scraped = set(annual_ts_keys)-set(annual_scraped_keys) - if len(missing_from_ts)>0: - print("") ; print("") - print("WARNING: These scraped keys missing from time-series annual cash-flow:") - print(_cashflow[_cashflow.index.isin(missing_from_ts)][sorted(_cashflow.columns, reverse=True)]) - if len(missing_from_scraped)>0: - print("") ; print("") - print("WARNING: These time-series keys missing from scraped annual cash-flow. Normally they are present in Yahoo.com so why missing here?") - print(annual_cashflow_ts[annual_cashflow_ts.index.isin(missing_from_scraped)]) - - # Next, compare time-series quarterly against scraped annual to see if same MultiIndex can be used - qtr_ts_keys = qtr_cashflow_ts.index.values - missing_from_annual = set(qtr_ts_keys)-set(annual_scraped_keys) - # Remove what already know is missing - missing_from_annual -= missing_from_scraped - if len(missing_from_annual) > 0: - print("") ; print("") - print("WARNING: These keys in time-series quarterly cash-flow are MISSING from scraped annual, complicates table construction") - print(qtr_cashflow_ts[qtr_cashflow_ts.index.isin(missing_from_annual)]) - else: - print("") ; print("") - print("GREAT: Should be easy to fix quarterly cash-flow, just copy MultiIndex from annual (once you fix missing keys)") - - _cashflow.index = utils.camel2title(_cashflow.T) - _cashflow['level_detail'] = cash_flow_level_detail - _cashflow = _cashflow.set_index([_cashflow.index,'level_detail']) - _cashflow = _cashflow[sorted(_cashflow.columns, reverse=True)] - self._cashflow["yearly"] = _cashflow.dropna(how='all') - # except Exception as e: - # self._cashflow["yearly"] = _pd.DataFrame() + _cashflow = utils.format_annual_financial_statement(cash_flow_level_detail, Annual_dicts, cash_flow_template_annual_order, TTM_dicts, cash_flow_template_ttm_order) + self._cashflow["yearly"] = _cashflow _cashflow_qtr = utils.get_financials_time_series(self.ticker, "cash-flow", "quarterly", ticker_url, proxy, self.session) - _cashflow_qtr = _cashflow_qtr.reindex(cash_flow_template_order) - _cashflow_qtr.index = utils.camel2title(_cashflow_qtr.T) - _cashflow_qtr['level_detail'] = cash_flow_level_detail - _cashflow_qtr = _cashflow_qtr.set_index([_cashflow_qtr.index,'level_detail']) - _cashflow_qtr = _cashflow_qtr[sorted(_cashflow_qtr.columns, reverse=True)] - _cashflow_qtr = _cashflow_qtr.dropna(how='all') - _cashflow_qtr.columns = _pd.to_datetime(_cashflow_qtr.columns).date + _cashflow_qtr = utils.format_quarterly_financial_statement(_cashflow_qtr, cash_flow_level_detail, cash_flow_template_order) self._cashflow["quarterly"] = _cashflow_qtr except Exception as e: self._cashflow["yearly"] = _pd.DataFrame() @@ -811,7 +653,7 @@ def cleanup(data): except Exception: pass - # Analysis + #------------------ Analysis ------------------ data = utils.get_json(ticker_url + '/analysis', proxy, self.session) if isinstance(data.get('earningsTrend'), dict): @@ -839,7 +681,7 @@ def cleanup(data): except Exception: pass - # Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website) + #------------------ Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website) ------------------ res = {} try: my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json', @@ -881,74 +723,6 @@ def cleanup(data): if 'trailingPegRatio' in res: self._info['trailingPegRatio'] = res['trailingPegRatio'] - #------------------ Income Statement ------------------ - # try: - # data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - # financials_template_ttm_order, financials_template_annual_order, financials_template_order, financials_level_detail = utils.build_template(data) - - # data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - # TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - # Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # # Combine the raw financial details and the template - # TTM = TTM.reindex(financials_template_ttm_order) - # Annual = Annual.reindex(financials_template_annual_order) - # TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - # TTM.index = TTM.index.str.replace(r'trailing', '') - # Annual.index = Annual.index.str.replace(r'annual','') - # _income_stmt = Annual.merge(TTM, how='outer',left_index=True, right_index=True) - # _income_stmt.index = utils.camel2title(_income_stmt.T) - # _income_stmt['level_detail'] = financials_level_detail - # _income_stmt = _income_stmt.set_index([_income_stmt.index,'level_detail']) - # self._income_stmt = _income_stmt.dropna(how='all') - # except Exception as e: - # self._income_stmt = _pd.DataFrame() - - #------------------ Balance Sheet ------------------ - # try: - # balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) - # data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - # balance_sheet_template_ttm_order, balance_sheet_template_annual_order,balance_sheet_template_order, balance_sheet_level_detail = utils.build_template(data) - - # data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - # Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # Annual = Annual.reindex(balance_sheet_template_annual_order) - # Annual.index = Annual.index.str.replace(r'annual','') - # Annual.index = utils.camel2title(Annual.T) - # _balance_sheet = Annual - # _balance_sheet['level_detail'] = balance_sheet_level_detail - # _balance_sheet = _balance_sheet.set_index([_balance_sheet.index,'level_detail']) - # self._balance_sheet = _balance_sheet.dropna(how='all') - # except Exception as e: - # self._balance_sheet = _pd.DataFrame() - - #------------------ Cash Flow Statement ------------------ - # try: - # cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) - # data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - # cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_template_order, cash_flow_level_detail = utils.build_template(data) - - # data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - # TTM = _pd.DataFrame.from_dict(TTM_dicts).set_index("index") - # Annual = _pd.DataFrame.from_dict(Annual_dicts).set_index("index") - # # Combine the raw financial details and the template - # TTM = TTM.reindex(cash_flow_template_ttm_order) - # Annual = Annual.reindex(cash_flow_template_annual_order) - # TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). - # TTM.index = TTM.index.str.replace(r'trailing', '') - # Annual.index = Annual.index.str.replace(r'annual','') - # _cash_flow = Annual.merge(TTM, how='outer',left_index=True, right_index=True) - # _cash_flow.index = utils.camel2title(_cash_flow.T) - # _cash_flow['level_detail'] = cash_flow_level_detail - # _cash_flow = _cash_flow.set_index([_cash_flow.index,'level_detail']) - # self._cash_flow = _cash_flow.dropna(how='all') - # except Exception as e: - # self._cash_flow = _pd.DataFrame() #------------------ Analysis Data/Analyst Forecasts ------------------ try: analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) diff --git a/yfinance/utils.py b/yfinance/utils.py index f1875aef0..8115d74cb 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -216,6 +216,49 @@ def retreive_financial_details(data): pass return TTM_dicts, Annual_dicts +def format_annual_financial_statement(level_detail, annual_dicts, annual_order, ttm_dicts=None, ttm_order=None): + ''' + format_annual_financial_statement formats any annual financial statement + + Returns: + - _statement: A fully formatted annual financial statement in pandas dataframe. + ''' + Annual = _pd.DataFrame.from_dict(annual_dicts).set_index("index") + Annual = Annual.reindex(annual_order) + Annual.index = Annual.index.str.replace(r'annual','') + + if ttm_dicts != None or ttm_order != None: # Balance sheet is the only financial statement with no ttm detail. + TTM = _pd.DataFrame.from_dict(ttm_dicts).set_index("index") + TTM = TTM.reindex(ttm_order) + TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). + TTM.index = TTM.index.str.replace(r'trailing', '') + _statement = Annual.merge(TTM, left_index=True, right_index=True) + else: + _statement = Annual + + _statement.index = camel2title(_statement.T) + _statement['level_detail'] = level_detail + _statement = _statement.set_index([_statement.index,'level_detail']) + _statement = _statement[sorted(_statement.columns, reverse=True)] + _statement = _statement.dropna(how='all') + return _statement + +def format_quarterly_financial_statement(_statement, level_detail, order): + ''' + format_quarterly_financial_statements formats any quarterly financial statement + + Returns: + - _statement: A fully formatted annual financial statement in pandas dataframe. + ''' + _statement = _statement.reindex(order) + _statement.index = camel2title(_statement.T) + _statement['level_detail'] = level_detail + _statement = _statement.set_index([_statement.index,'level_detail']) + _statement = _statement[sorted(_statement.columns, reverse=True)] + _statement = _statement.dropna(how='all') + _statement.columns = _pd.to_datetime(_statement.columns).date + return _statement + def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session=None): acceptable_names = ["financials", "balance-sheet", "cash-flow"] if not name in acceptable_names: diff --git a/yfinance/version.py b/yfinance/version.py index ecb526e9f..cbf0ad46c 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.1.77" +version = "0.1.78" From 6e96a1a8e6747979c2f3ad5d8beb198410198828 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Mon, 17 Oct 2022 16:40:12 +0100 Subject: [PATCH 16/20] Refactor properly ; Rename some new properties --- README.md | 12 +++ test_yf_merge.py | 98 ------------------ test_yfinance.py | 4 +- yfinance/base.py | 242 ++++++++++++++++----------------------------- yfinance/ticker.py | 30 +++--- yfinance/utils.py | 29 ++---- 6 files changed, 121 insertions(+), 294 deletions(-) delete mode 100644 test_yf_merge.py diff --git a/README.md b/README.md index 5545cda3e..7faba7a16 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,9 @@ msft.dividends # show splits msft.splits +# show share count +msft.shares + # show income statement msft.income_stmt msft.quarterly_income_stmt @@ -88,6 +91,9 @@ msft.major_holders # show institutional holders msft.institutional_holders +# show mutualfund holders +msft.mutualfund_holders + # show earnings msft.earnings msft.quarterly_earnings @@ -97,6 +103,12 @@ msft.sustainability # show analysts recommendations msft.recommendations +msft.recommendations_summary +# show analysts other work +msft.analyst_price_target +mfst.revenue_forecasts +mfst.earnings_forecasts +mfst.earnings_trend # show next event (earnings, etc) msft.calendar diff --git a/test_yf_merge.py b/test_yf_merge.py deleted file mode 100644 index 6d04c423c..000000000 --- a/test_yf_merge.py +++ /dev/null @@ -1,98 +0,0 @@ -import yfinance as yf -import os -print(os.path.abspath(yf.__file__)) - -msft = yf.Ticker("MSFT") - -print(msft.isin) -print(msft.major_holders) -print(msft.institutional_holders) -print(msft.mutualfund_holders) -print(msft.dividends) -print(msft.splits) -print(msft.actions) -print(msft.shares) -print(msft.info) -print(msft.calendar) -print(msft.recommendations) -print(msft.earnings) -print(msft.quarterly_earnings) -print(msft.income_stmt) -print(msft.quarterly_income_stmt) -print(msft.balance_sheet) -print(msft.quarterly_balance_sheet) -print(msft.cashflow) -print(msft.quarterly_cashflow) -print(msft.current_recommendations) -print(msft.analyst_price_target) -print(msft.revenue_forecasts) -print(msft.sustainability) -print(msft.options) -print(msft.news) -print(msft.analysis) -print(msft.earnings_history) -print(msft.earnings_dates) -print(msft.earnings_forecasts) - - -# # get stock info -# print(msft.info) - -# # get historical market data -# hist = msft.history(period="max") - -# # show actions (dividends, splits) -# print(msft.actions) - -# # show dividends -# print(msft.dividends) - -# # show splits -# print(msft.splits) - -# # show financials -# print(msft.financials) -# print(msft.quarterly_financials) - -# # show major holders -# print(msft.major_holders) - -# # show institutional holders -# print(msft.institutional_holders) - -# # show balance sheet -# print(msft.balance_sheet) -# print(msft.quarterly_balance_sheet) - -# # show cashflow -# print(msft.cashflow) -# print(msft.quarterly_cashflow) - -# # show earnings -# print(msft.earnings) -# print(msft.quarterly_earnings) - -# # show sustainability -# print(msft.sustainability) - -# # show analysts recommendations -# print(msft.recommendations) - -# # show next event (earnings, etc) -# print(msft.calendar) - -# # show all earnings dates -# print(msft.earnings_dates) - -# # show ISIN code - *experimental* -# # ISIN = International Securities Identification Number -# print(msft.isin) - -# # show options expirations -# print(msft.options) - -# # show news -# print(msft.news) - -# # get option chain for specific expiration -# # opt = msft.option_chain('YYYY-MM-DD') \ No newline at end of file diff --git a/test_yfinance.py b/test_yfinance.py index 8b347d887..098eaa45a 100644 --- a/test_yfinance.py +++ b/test_yfinance.py @@ -49,13 +49,13 @@ def test_attributes(self): ticker.quarterly_balance_sheet ticker.cashflow ticker.quarterly_cashflow - ticker.current_recommendations + ticker.recommendations_summary ticker.analyst_price_target ticker.revenue_forecasts ticker.sustainability ticker.options ticker.news - ticker.analysis + ticker.earnings_trend ticker.earnings_history ticker.earnings_dates ticker.earnings_forecasts diff --git a/yfinance/base.py b/yfinance/base.py index 0b93f241b..10d7a61e4 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -20,8 +20,6 @@ # from __future__ import print_function -from ctypes import util - import time as _time import datetime as _datetime @@ -30,7 +28,6 @@ import pandas as _pd import numpy as _np import re as _re -from pprint import pprint try: from urllib.parse import quote as urlencode @@ -49,6 +46,7 @@ _SCRAPE_URL_ = 'https://finance.yahoo.com/quote' _ROOT_URL_ = 'https://finance.yahoo.com' + class TickerBase(): def __init__(self, ticker, session=None): self.ticker = ticker.upper() @@ -60,7 +58,7 @@ def __init__(self, ticker, session=None): self._fundamentals = False self._info = None - self._analysis = None + self._earnings_trend = None self._sustainability = None self._recommendations = None self._analyst_trend_details = None @@ -81,9 +79,7 @@ def __init__(self, ticker, session=None): self._earnings_history = None self._earnings = None - self._income_stmt = None - self._balance_sheet = None - self._cashflow = None + self._financials = None # accept isin as ticker if utils.is_isin(self.ticker): @@ -102,7 +98,7 @@ def stats(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - data = utils.get_json(ticker_url, proxy, self.session) + data = utils.get_json_data_stores(ticker_url, proxy, self.session)["QuoteSummaryStore"] return data def history(self, period="1mo", interval="1d", @@ -350,7 +346,7 @@ def _get_ticker_tz(self): return tkr_tz def _get_info(self, proxy=None): - #------------------ Setup Proxy in Requests Format ------------------ + # setup proxy in requests format if proxy is not None: if isinstance(proxy, dict) and "https" in proxy: proxy = proxy["https"] @@ -365,12 +361,9 @@ def _get_info(self, proxy=None): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) # get info and sustainability - data = utils.get_json(ticker_url, proxy, self.session) - data = data['context']['dispatcher']['stores']['QuoteSummaryStore'] + data = utils.get_json_data_stores(ticker_url, proxy, self.session)['QuoteSummaryStore'] - #------------------ Sustainability ------------------ - data = utils.get_json(ticker_url, proxy, self.session) - data = data['context']['dispatcher']['stores']['QuoteSummaryStore'] + # sustainability d = {} try: if isinstance(data.get('esgScores'), dict): @@ -389,7 +382,7 @@ def _get_info(self, proxy=None): except Exception: pass - #------------------ Info (be nice to python 2) ------------------ + # info (be nice to python 2) self._info = {} try: items = ['summaryProfile', 'financialData', 'quoteType', @@ -435,7 +428,7 @@ def _get_info(self, proxy=None): except Exception: pass - #------------------ Events ------------------ + # events try: cal = _pd.DataFrame( data['calendarEvents']['earnings']) @@ -447,7 +440,7 @@ def _get_info(self, proxy=None): except Exception: pass - #------------------ Long Term Analyst Recommendations ------------------ + # analyst recommendations try: rec = _pd.DataFrame( data['upgradeDowngradeHistory']['history']) @@ -486,7 +479,7 @@ def cleanup(data): df.index = utils.camel2title(df.index) return df - #------------------ Setup Proxy in Requests Format ------------------ + # setup proxy in requests format if proxy is not None: if isinstance(proxy, dict) and "https" in proxy: proxy = proxy["https"] @@ -497,7 +490,7 @@ def cleanup(data): ticker_url = "{}/{}".format(self._scrape_url, self.ticker) - # ------------------ Holders ------------------ + # holders try: resp = utils.get_html(ticker_url + '/holders', proxy, self.session) holders = _pd.read_html(resp) @@ -533,39 +526,26 @@ def cleanup(data): self._get_info(proxy) # get fundamentals - financials_data = utils.get_json(ticker_url + '/financials', proxy, self.session) - data = financials_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + fin_data = utils.get_json_data_stores(ticker_url + '/financials', proxy, self.session) + fin_data_quote = fin_data['QuoteSummaryStore'] # generic patterns self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._cashflow = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._balance_sheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - self._income = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} - for key in ( - (self._cashflow, 'cashflowStatement', 'cashflowStatements'), - (self._balance_sheet, 'balanceSheet', 'balanceSheetStatements'), - (self._income, 'incomeStatement', 'incomeStatementHistory') - ): - item = key[1] + 'History' - if isinstance(data.get(item), dict): - try: - # key[0]['yearly'] = cleanup(data[item][key[2]]) - key[0]['yearly_legacy'] = cleanup(data[item][key[2]]) - except Exception: - pass - - item = key[1] + 'HistoryQuarterly' - if isinstance(data.get(item), dict): - try: - key[0]['quarterly'] = cleanup(data[item][key[2]]) - except Exception: - pass - - #------------------ Earnings ------------------ - if isinstance(data.get('earnings'), dict): + self._financials = {} + for name in ["income", "balance-sheet", "cash-flow"]: + self._financials[name] = {"yearly":utils.empty_df(), "quarterly":utils.empty_df()} + for name in ["income", "balance-sheet", "cash-flow"]: + annual, qtr = self._create_financials_table(name, proxy) + if annual is not None: + self._financials[name]["yearly"] = annual + if qtr is not None: + self._financials[name]["quarterly"] = qtr + + # earnings + if isinstance(fin_data_quote.get('earnings'), dict): try: - earnings = data['earnings']['financialsChart'] - earnings['financialCurrency'] = 'USD' if 'financialCurrency' not in data['earnings'] else data['earnings']['financialCurrency'] + earnings = fin_data_quote['earnings']['financialsChart'] + earnings['financialCurrency'] = 'USD' if 'financialCurrency' not in fin_data_quote['earnings'] else fin_data_quote['earnings']['financialCurrency'] self._earnings['financialCurrency'] = earnings['financialCurrency'] df = _pd.DataFrame(earnings['yearly']).set_index('date') df.columns = utils.camel2title(df.columns) @@ -579,69 +559,10 @@ def cleanup(data): except Exception: pass - #------------------ Income Statement ------------------ - try: - # Grab the financial template store. This details the order in which the financials should be presented. - data = financials_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - financials_template_ttm_order, financials_template_annual_order, financials_template_order, financials_level_detail = utils.build_template(data) - - # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - data = financials_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - _income_stmt = utils.format_annual_financial_statement(financials_level_detail, Annual_dicts, financials_template_annual_order, TTM_dicts, financials_template_ttm_order) - self._income["yearly"] = _income_stmt - - _income_stmt_qtr = utils.get_financials_time_series(self.ticker, "financials", "quarterly", ticker_url, proxy, self.session) - _income_stmt_qtr = utils.format_quarterly_financial_statement(_income_stmt_qtr, financials_level_detail, financials_template_order) - self._income["quarterly"] = _income_stmt_qtr - - except Exception as e: - self._income["yearly"] = _pd.DataFrame() - self._income["quarterly"] = _pd.DataFrame() - - #------------------ Balance Sheet ------------------ - try: - balance_sheet_data = utils.get_json(ticker_url+'/balance-sheet', proxy, self.session) - data = balance_sheet_data['context']['dispatcher']['stores']['FinancialTemplateStore'] - balance_sheet_template_ttm_order, balance_sheet_template_annual_order, balance_sheet_template_order, balance_sheet_level_detail = utils.build_template(data) - - data = balance_sheet_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - _balance_sheet = utils.format_annual_financial_statement(balance_sheet_level_detail, Annual_dicts, balance_sheet_template_annual_order) - self._balance_sheet["yearly"] = _balance_sheet - - _balance_sheet_qtr = utils.get_financials_time_series(self.ticker, "balance-sheet", "quarterly", ticker_url, proxy, self.session) - _balance_sheet_qtr = utils.format_quarterly_financial_statement(_balance_sheet_qtr, balance_sheet_level_detail, balance_sheet_template_order) - self._balance_sheet["quarterly"] = _balance_sheet_qtr - - except Exception as e: - self._balance_sheet["yearly"] = _pd.DataFrame() - self._balance_sheet["quarterly"] = _pd.DataFrame() - #------------------ Cash Flow Statement ------------------ - try: - cash_flow_data = utils.get_json(ticker_url+'/cash-flow', proxy, self.session) - data = cash_flow_data['context']['dispatcher']['stores']['FinancialTemplateStore'] # Grab the financial template store. This details the order in which the financials should be presented. - cash_flow_template_ttm_order, cash_flow_template_annual_order, cash_flow_template_order, cash_flow_level_detail = utils.build_template(data) - - data = cash_flow_data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] # Grab the raw financial details (this can be later combined with the financial template store detail to correctly order and present the data). - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data) - - _cashflow = utils.format_annual_financial_statement(cash_flow_level_detail, Annual_dicts, cash_flow_template_annual_order, TTM_dicts, cash_flow_template_ttm_order) - self._cashflow["yearly"] = _cashflow - - _cashflow_qtr = utils.get_financials_time_series(self.ticker, "cash-flow", "quarterly", ticker_url, proxy, self.session) - _cashflow_qtr = utils.format_quarterly_financial_statement(_cashflow_qtr, cash_flow_level_detail, cash_flow_template_order) - self._cashflow["quarterly"] = _cashflow_qtr - except Exception as e: - self._cashflow["yearly"] = _pd.DataFrame() - self._cashflow["quarterly"] = _pd.DataFrame() - - #------------------ Shares Outstanding ------------------ + # shares outstanding try: # keep only years with non None data - available_shares = [shares_data for shares_data in data['annualBasicAverageShares'] if shares_data] + available_shares = [shares_data for shares_data in fin_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] if shares_data] shares = _pd.DataFrame(available_shares) shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4])) shares.set_index('Year', inplace=True) @@ -653,8 +574,8 @@ def cleanup(data): except Exception: pass - #------------------ Analysis ------------------ - data = utils.get_json(ticker_url + '/analysis', proxy, self.session) + # Analysis + data = utils.get_json_data_stores(ticker_url + '/analysis', proxy, self.session)["QuoteSummaryStore"] if isinstance(data.get('earningsTrend'), dict): try: @@ -676,12 +597,12 @@ def cleanup(data): utils.camel2title([k])[0] analysis.loc[idx, new_colname] = v - self._analysis = analysis[[ + self._earnings_trend = analysis[[ c for c in analysis.columns if c not in dict_cols]] except Exception: pass - #------------------ Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website) ------------------ + # Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website) res = {} try: my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json', @@ -723,20 +644,20 @@ def cleanup(data): if 'trailingPegRatio' in res: self._info['trailingPegRatio'] = res['trailingPegRatio'] - #------------------ Analysis Data/Analyst Forecasts ------------------ + # Analysis Data/Analyst Forecasts try: - analysis_data = utils.get_json(ticker_url+'/analysis',proxy,self.session) - analysis_data = analysis_data['context']['dispatcher']['stores']['QuoteSummaryStore'] + analysis_data = utils.get_json_data_stores(ticker_url+'/analysis',proxy,self.session) + analysis_data = analysis_data['QuoteSummaryStore'] except Exception as e: analysis_data = {} try: self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) except Exception as e: - self._analyst_trend_details = _pd.DataFrame() + self._analyst_trend_details = utils.empty_df() try: self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T except Exception as e: - self._analyst_price_target = _pd.DataFrame() + self._analyst_price_target = utils.empty_df() earnings_estimate = [] revenue_estimate = [] if len(self._analyst_trend_details) != 0: @@ -761,6 +682,39 @@ def cleanup(data): self._fundamentals = True + def _create_financials_table(self, name, proxy): + acceptable_names = ["income", "balance-sheet", "cash-flow"] + if not name in acceptable_names: + raise Exception("name '{}' must be one of: {}".format(name, acceptable_names)) + + if name == "income": + # Yahoo stores the 'income' table internally under 'financials' key + name = "financials" + + ticker_url = "{}/{}".format(self._scrape_url, self.ticker) + data_store = utils.get_json_data_stores(ticker_url+'/'+name, proxy, self.session) + + _stmt_annual = None + _stmt_qtr = None + try: + template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) + TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) + + if name == "balance-sheet": + _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order) + else: + _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order, TTM_dicts, template_ttm_order) + + # Data store doesn't contain quarterly data, so retrieve using different url: + _qtr_data = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) + + _stmt_qtr = utils.format_quarterly_financial_statement(_qtr_data, level_detail, template_order) + except: + pass + + return _stmt_annual, _stmt_qtr + + def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): self._get_info(proxy) data = self._recommendations @@ -812,7 +766,7 @@ def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs): return data.to_dict() return data - def get_current_recommendations(self, proxy=None, as_dict=False, *args, **kwargs): + def get_recommendations_summary(self, proxy=None, as_dict=False, *args, **kwargs): self._get_fundamentals(proxy=proxy) data = self._analyst_trend_details if as_dict: @@ -840,6 +794,13 @@ def get_earnings_forecast(self, proxy=None, as_dict=False, *args, **kwargs): return data.to_dict() return data + def get_earnings_trend(self, proxy=None, as_dict=False, *args, **kwargs): + self._get_fundamentals(proxy=proxy) + data = self._earnings_trend + if as_dict: + return data.to_dict() + return data + def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) data = self._earnings[freq] @@ -849,58 +810,23 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): return dict_data return data - def get_analysis(self, proxy=None, as_dict=False, *args, **kwargs): - self._get_fundamentals(proxy=proxy) - data = self._analysis - if as_dict: - return data.to_dict() - return data - def get_income_stmt(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) - data = self._income[freq] - if as_dict: - return data.to_dict() - return data - - def get_income_stmt_legacy(self, proxy=None, as_dict=False, freq="yearly"): - self._get_fundamentals(proxy=proxy) - data = self._income[freq+"_legacy"] + data = self._financials["income"][freq] if as_dict: return data.to_dict() return data def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) - data = self._balance_sheet[freq] + data = self._financials["balance-sheet"][freq] if as_dict: return data.to_dict() return data - def get_balance_sheet_legacy(self, proxy=None, as_dict=False, freq="yearly"): - self._get_fundamentals(proxy=proxy) - data = self._balance_sheet[freq+"_legacy"] - if as_dict: - return data.to_dict() - return data - - def get_quarterly_cash_flow_statement(self, proxy=None, as_dict=False): # Could still be used for quarterly - self._get_fundamentals(proxy=proxy) - data = self._quarterly_cash_flow["quarterly"] - if as_dict: - return data.to_dict() - return data - def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"): self._get_fundamentals(proxy=proxy) - data = self._cashflow[freq] - if as_dict: - return data.to_dict() - return data - - def get_cashflow_legacy(self, proxy=None, as_dict=False, freq="yearly"): - self._get_fundamentals(proxy=proxy) - data = self._cashflow[freq+"_legacy"] + data = self._financials["cash-flow"][freq] if as_dict: return data.to_dict() return data @@ -1124,6 +1050,6 @@ def get_earnings_history(self, proxy=None): self._earnings_history = data # if no tables are found a ValueError is thrown except ValueError: - print("Could not find data for {}.".format(self.ticker)) + print("Could not find earnings history data for {}.".format(self.ticker)) return return data diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 2908032d7..d14af9599 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -166,10 +166,6 @@ def quarterly_earnings(self): def income_stmt(self): return self.get_income_stmt() - @property - def income_stmt_legacy(self): - return self.get_income_stmt_legacy() - @property def quarterly_income_stmt(self): return self.get_income_stmt(freq='quarterly') @@ -178,29 +174,29 @@ def quarterly_income_stmt(self): def balance_sheet(self): return self.get_balance_sheet() - @property - def balance_sheet_legacy(self): - return self.get_balance_sheet_legacy() - @property def quarterly_balance_sheet(self): return self.get_balance_sheet(freq='quarterly') @property - def cashflow(self): - return self.get_cashflow() + def balancesheet(self): + return self.balance_sheet + + @property + def quarterly_balancesheet(self): + return self.quarterly_balance_sheet @property - def cashflow_legacy(self): - return self.get_cashflow_legacy() + def cashflow(self): + return self.get_cashflow(freq="yearly") @property def quarterly_cashflow(self): return self.get_cashflow(freq='quarterly') @property - def current_recommendations(self): - return self.get_current_recommendations() + def recommendations_summary(self): + return self.get_recommendations_summary() @property def analyst_price_target(self): @@ -225,8 +221,8 @@ def news(self): return self.get_news() @property - def analysis(self): - return self.get_analysis() + def earnings_trend(self): + return self.get_earnings_trend() @property def earnings_history(self): @@ -238,4 +234,4 @@ def earnings_dates(self): @property def earnings_forecasts(self): - return self.get_earnings_forecast() \ No newline at end of file + return self.get_earnings_forecast() diff --git a/yfinance/utils.py b/yfinance/utils.py index 8115d74cb..6c4cf0a47 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -21,7 +21,6 @@ from __future__ import print_function -import time as _time import datetime as _datetime import pytz as _tz import requests as _requests @@ -107,27 +106,17 @@ def get_html(url, proxy=None, session=None): return html -def get_json(url, proxy=None, session=None): +def get_json_data_stores(url, proxy=None, session=None): ''' - get_json returns a python dictionary of the store detail for yahoo finance web pages. + get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page. ''' session = session or _requests html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text json_str = html.split('root.App.main =')[1].split( '(this)')[0].split(';\n}')[0].strip() - # data = _json.loads(json_str)['context']['dispatcher']['stores']['QuoteSummaryStore'] - data = _json.loads(json_str) + data = _json.loads(json_str)['context']['dispatcher']['stores'] - # add data about Shares Outstanding for companies' tickers if they are available - try: - data['annualBasicAverageShares'] = _json.loads( - json_str)['context']['dispatcher']['stores'][ - 'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares'] - except Exception: - pass - - ## TODO: Why dumping and parsing again? # return data new_data = _json.dumps(data).replace('{}', 'null') new_data = _re.sub( @@ -135,6 +124,7 @@ def get_json(url, proxy=None, session=None): return _json.loads(new_data) + def build_template(data): ''' build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". @@ -187,6 +177,7 @@ def build_template(data): level_detail.append(5) return template_ttm_order, template_annual_order, template_order, level_detail + def retreive_financial_details(data): ''' retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet". @@ -216,6 +207,7 @@ def retreive_financial_details(data): pass return TTM_dicts, Annual_dicts + def format_annual_financial_statement(level_detail, annual_dicts, annual_order, ttm_dicts=None, ttm_order=None): ''' format_annual_financial_statement formats any annual financial statement @@ -243,6 +235,7 @@ def format_annual_financial_statement(level_detail, annual_dicts, annual_order, _statement = _statement.dropna(how='all') return _statement + def format_quarterly_financial_statement(_statement, level_detail, order): ''' format_quarterly_financial_statements formats any quarterly financial statement @@ -259,6 +252,7 @@ def format_quarterly_financial_statement(_statement, level_detail, order): _statement.columns = _pd.to_datetime(_statement.columns).date return _statement + def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session=None): acceptable_names = ["financials", "balance-sheet", "cash-flow"] if not name in acceptable_names: @@ -269,7 +263,7 @@ def get_financials_time_series(ticker, name, timescale, ticker_url, proxy=None, session = session or _requests - financials_data = get_json(ticker_url+'/'+name, proxy, session) + financials_data = get_json_data_stores(ticker_url+'/'+name, proxy, session) # Step 1: get the keys: def _finditem1(key, obj): @@ -283,15 +277,12 @@ def _finditem1(key, obj): for v in obj: values += _finditem1(key,v) return values - keys = _finditem1("key",financials_data['context']['dispatcher']['stores']['FinancialTemplateStore']) + keys = _finditem1("key",financials_data['FinancialTemplateStore']) # Step 2: construct url: ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(ticker) if len(keys) == 0: raise Exception("Fetching keys failed") - # yr_url = ts_url_base + "&type=" + ",".join(["annual"+k for k in keys]) - # qtr_url = ts_url_base + "&type=" + ",".join(["quarterly"+k for k in keys]) - # url = qtr_url url = ts_url_base + "&type=" + ",".join([timescale+k for k in keys]) # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt: start_dt = _datetime.datetime(2016, 12, 31) From 730afda4a728f3106e15ce619fd404db7b0098ac Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Thu, 20 Oct 2022 22:19:08 +0100 Subject: [PATCH 17/20] Fix financials placeholders --- yfinance/base.py | 44 +++++++++---- yfinance/utils.py | 163 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 15 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 10d7a61e4..b346922c6 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -304,13 +304,34 @@ def history(self, period="1mo", interval="1d", tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"] - # combine - df = _pd.concat([quotes, dividends, splits], axis=1, sort=True) - df["Dividends"].fillna(0, inplace=True) - df["Stock Splits"].fillna(0, inplace=True) + quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange) + + # prepare index for combine: + quotes.index = quotes.index.tz_localize("UTC").tz_convert(tz_exchange) + splits.index = splits.index.tz_localize("UTC").tz_convert(tz_exchange) + dividends.index = dividends.index.tz_localize("UTC").tz_convert(tz_exchange) + if params["interval"] in ["1d","1w","1wk","1mo","3mo"]: + # Converting datetime->date should improve merge performance + quotes.index = _pd.to_datetime(quotes.index.date) + splits.index = _pd.to_datetime(splits.index.date) + dividends.index = _pd.to_datetime(dividends.index.date) - # index eod/intraday - df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange) + # combine + df = quotes + if actions: + df = df.sort_index() + if dividends.shape[0] > 0: + df = utils.safe_merge_dfs(df, dividends, interval) + if "Dividends" in df.columns: + df.loc[df["Dividends"].isna(),"Dividends"] = 0 + else: + df["Dividends"] = 0.0 + if splits.shape[0] > 0: + df = utils.safe_merge_dfs(df, splits, interval) + if "Stock Splits" in df.columns: + df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0 + else: + df["Stock Splits"] = 0.0 df = utils.fix_Yahoo_dst_issue(df, params["interval"]) @@ -319,7 +340,6 @@ def history(self, period="1mo", interval="1d", elif params["interval"] == "1h": pass else: - df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange) df.index.name = "Date" # duplicates and missing rows cleanup @@ -328,8 +348,6 @@ def history(self, period="1mo", interval="1d", self._history = df.copy() - if not actions: - df.drop(columns=["Dividends", "Stock Splits"], inplace=True) return df def _get_ticker_tz(self): @@ -530,10 +548,10 @@ def cleanup(data): fin_data_quote = fin_data['QuoteSummaryStore'] # generic patterns - self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()} + self._earnings = {"yearly": utils.pd.DataFrame(), "quarterly": utils.pd.DataFrame()} self._financials = {} for name in ["income", "balance-sheet", "cash-flow"]: - self._financials[name] = {"yearly":utils.empty_df(), "quarterly":utils.empty_df()} + self._financials[name] = {"yearly":utils.pd.DataFrame(), "quarterly":utils.pd.DataFrame()} for name in ["income", "balance-sheet", "cash-flow"]: annual, qtr = self._create_financials_table(name, proxy) if annual is not None: @@ -653,11 +671,11 @@ def cleanup(data): try: self._analyst_trend_details = _pd.DataFrame(analysis_data['recommendationTrend']['trend']) except Exception as e: - self._analyst_trend_details = utils.empty_df() + self._analyst_trend_details = None try: self._analyst_price_target = _pd.DataFrame(analysis_data['financialData'], index=[0])[['targetLowPrice','currentPrice','targetMeanPrice','targetHighPrice','numberOfAnalystOpinions']].T except Exception as e: - self._analyst_price_target = utils.empty_df() + self._analyst_price_target = None earnings_estimate = [] revenue_estimate = [] if len(self._analyst_trend_details) != 0: diff --git a/yfinance/utils.py b/yfinance/utils.py index 6c4cf0a47..5d07fe3a6 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -241,7 +241,7 @@ def format_quarterly_financial_statement(_statement, level_detail, order): format_quarterly_financial_statements formats any quarterly financial statement Returns: - - _statement: A fully formatted annual financial statement in pandas dataframe. + - _statement: A fully formatted quarterly financial statement in pandas dataframe. ''' _statement = _statement.reindex(order) _statement.index = camel2title(_statement.T) @@ -434,11 +434,170 @@ def parse_actions(data): splits.sort_index(inplace=True) splits["Stock Splits"] = splits["numerator"] / \ splits["denominator"] - splits = splits["Stock Splits"] + splits = splits[["Stock Splits"]] return dividends, splits +def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange): + # Yahoo bug fix. If market is open today then Yahoo normally returns + # todays data as a separate row from rest-of week/month interval in above row. + # Seems to depend on what exchange e.g. crypto OK. + # Fix = merge them together + n = quotes.shape[0] + if n > 1: + dt1 = quotes.index[n-1].tz_localize("UTC").tz_convert(tz_exchange) + dt2 = quotes.index[n-2].tz_localize("UTC").tz_convert(tz_exchange) + if interval in ["1wk", "1mo", "3mo"]: + if interval == "1wk": + last_rows_same_interval = dt1.year==dt2.year and dt1.week==dt2.week + elif interval == "1mo": + last_rows_same_interval = dt1.month==dt2.month + elif interval == "3mo": + last_rows_same_interval = dt1.year==dt2.year and dt1.quarter==dt2.quarter + if last_rows_same_interval: + # Last two rows are within same interval + idx1 = quotes.index[n-1] + idx2 = quotes.index[n-2] + if _np.isnan(quotes.loc[idx2,"Open"]): + quotes.loc[idx2,"Open"] = quotes["Open"][n-1] + # Note: nanmax() & nanmin() ignores NaNs + quotes.loc[idx2,"High"] = _np.nanmax([quotes["High"][n-1], quotes["High"][n-2]]) + quotes.loc[idx2,"Low"] = _np.nanmin([quotes["Low"][n-1], quotes["Low"][n-2]]) + quotes.loc[idx2,"Close"] = quotes["Close"][n-1] + if "Adj High" in quotes.columns: + quotes.loc[idx2,"Adj High"] = _np.nanmax([quotes["Adj High"][n-1], quotes["Adj High"][n-2]]) + if "Adj Low" in quotes.columns: + quotes.loc[idx2,"Adj Low"] = _np.nanmin([quotes["Adj Low"][n-1], quotes["Adj Low"][n-2]]) + if "Adj Close" in quotes.columns: + quotes.loc[idx2,"Adj Close"] = quotes["Adj Close"][n-1] + quotes.loc[idx2,"Volume"] += quotes["Volume"][n-1] + quotes = quotes.drop(quotes.index[n-1]) + + # Similar bug in daily data except most data is simply duplicated + # - exception is volume, *slightly* greater on final row (and matches website) + elif interval=="1d": + if dt1.date() == dt2.date(): + # Last two rows are on same day. Drop second-to-last row + quotes = quotes.drop(quotes.index[n-2]) + + return quotes + + +def safe_merge_dfs(df_main, df_sub, interval): + # Carefully merge 'df_sub' onto 'df_main' + # If naive merge fails, try again with reindexing df_sub: + # 1) if interval is weekly or monthly, then try with index set to start of week/month + # 2) if still failing then manually search through df_main.index to reindex df_sub + + if df_sub.shape[0] == 0: + raise Exception("No data to merge") + + df_sub_backup = df_sub.copy() + data_cols = [c for c in df_sub.columns if not c in df_main] + if len(data_cols) > 1: + raise Exception("Expected 1 data col") + data_col = data_cols[0] + + def _reindex_events(df, new_index, data_col_name): + if len(new_index) == len(set(new_index)): + # No duplicates, easy + df.index = new_index + return df + + df["_NewIndex"] = new_index + # Duplicates present within periods but can aggregate + if data_col_name == "Dividends": + # Add + df = df.groupby("_NewIndex").sum() + df.index.name = None + elif data_col_name == "Stock Splits": + # Product + df = df.groupby("_NewIndex").prod() + df.index.name = None + else: + raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name)) + if "_NewIndex" in df.columns: + df = df.drop("_NewIndex",axis=1) + return df + + df = df_main.join(df_sub) + + f_na = df[data_col].isna() + data_lost = sum(~f_na) < df_sub.shape[0] + if not data_lost: + return df + # Lost data during join() + if interval in ["1wk","1mo","3mo"]: + # Backdate all df_sub.index dates to start of week/month + if interval == "1wk": + new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp() + elif interval == "1mo": + new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp() + elif interval == "3mo": + new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp() + df_sub = _reindex_events(df_sub, new_index, data_col) + df = df_main.join(df_sub) + + f_na = df[data_col].isna() + data_lost = sum(~f_na) < df_sub.shape[0] + if not data_lost: + return df + # Lost data during join(). Manually check each df_sub.index date against df_main.index to + # find matching interval + df_sub = df_sub_backup.copy() + new_index = [-1]*df_sub.shape[0] + for i in range(df_sub.shape[0]): + dt_sub_i = df_sub.index[i] + if dt_sub_i in df_main.index: + new_index[i] = dt_sub_i ; continue + # Found a bad index date, need to search for near-match in df_main (same week/month) + fixed = False + for j in range(df_main.shape[0]-1): + dt_main_j0 = df_main.index[j] + dt_main_j1 = df_main.index[j+1] + if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1): + fixed = True + if interval.endswith('h') or interval.endswith('m'): + # Must also be same day + fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date()) + if fixed: + dt_sub_i = dt_main_j0 ; break + if not fixed: + last_main_dt = df_main.index[df_main.shape[0]-1] + diff = dt_sub_i - last_main_dt + if interval == "1mo" and last_main_dt.month == dt_sub_i.month: + dt_sub_i = last_main_dt ; fixed = True + elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter: + dt_sub_i = last_main_dt ; fixed = True + elif interval == "1wk" and last_main_dt.week == dt_sub_i.week: + dt_sub_i = last_main_dt ; fixed = True + elif interval == "1d" and last_main_dt.day == dt_sub_i.day: + dt_sub_i = last_main_dt ; fixed = True + elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour: + dt_sub_i = last_main_dt ; fixed = True + else: + td = _pd.to_timedelta(interval) + if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td): + dt_sub_i = last_main_dt ; fixed = True + new_index[i] = dt_sub_i + df_sub = _reindex_events(df_sub, new_index, data_col) + df = df_main.join(df_sub) + + f_na = df[data_col].isna() + data_lost = sum(~f_na) < df_sub.shape[0] + if data_lost: + ## Not always possible to match events with trading, e.g. when released pre-market. + ## So have to append to bottom with nan prices. + f_missing = ~df_sub.index.isin(df.index) + df_sub_missing = df_sub[f_missing] + keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns) + df_sub_missing[list(keys)] = _np.nan + df = _pd.concat([df, df_sub_missing], sort=True) + + return df + + def fix_Yahoo_dst_issue(df, interval): if interval in ["1d","1w","1wk"]: # These intervals should start at time 00:00. But for some combinations of date and timezone, From 053e0b9abb722e93728aca1ca670c4d6e8d3aeb8 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Thu, 20 Oct 2022 22:24:24 +0100 Subject: [PATCH 18/20] Port in @git-shogg fix, & fix typos --- yfinance/base.py | 5 +++-- yfinance/utils.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index b346922c6..5a28d0ae3 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -548,10 +548,10 @@ def cleanup(data): fin_data_quote = fin_data['QuoteSummaryStore'] # generic patterns - self._earnings = {"yearly": utils.pd.DataFrame(), "quarterly": utils.pd.DataFrame()} + self._earnings = {"yearly": utils._pd.DataFrame(), "quarterly": utils._pd.DataFrame()} self._financials = {} for name in ["income", "balance-sheet", "cash-flow"]: - self._financials[name] = {"yearly":utils.pd.DataFrame(), "quarterly":utils.pd.DataFrame()} + self._financials[name] = {"yearly":utils._pd.DataFrame(), "quarterly":utils._pd.DataFrame()} for name in ["income", "balance-sheet", "cash-flow"]: annual, qtr = self._create_financials_table(name, proxy) if annual is not None: @@ -719,6 +719,7 @@ def _create_financials_table(self, name, proxy): TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) if name == "balance-sheet": + # Note: balance sheet is the only financial statement with no ttm detail _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order) else: _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order, TTM_dicts, template_ttm_order) diff --git a/yfinance/utils.py b/yfinance/utils.py index 5d07fe3a6..6debbf724 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -219,7 +219,8 @@ def format_annual_financial_statement(level_detail, annual_dicts, annual_order, Annual = Annual.reindex(annual_order) Annual.index = Annual.index.str.replace(r'annual','') - if ttm_dicts != None or ttm_order != None: # Balance sheet is the only financial statement with no ttm detail. + # Note: balance sheet is the only financial statement with no ttm detail + if (ttm_dicts not in [[], None]) and (ttm_order not in [[], None]): TTM = _pd.DataFrame.from_dict(ttm_dicts).set_index("index") TTM = TTM.reindex(ttm_order) TTM.columns = ['TTM ' + str(col) for col in TTM.columns] # Add 'TTM' prefix to all column names, so if combined we can tell the difference between actuals and TTM (similar to yahoo finance). From e29df56253f3fec087d545a2964e13ecd3e69dd2 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 28 Oct 2022 14:16:54 +0100 Subject: [PATCH 19/20] Financials - reorder rows to match website, disable MultiIndex --- yfinance/base.py | 28 +++++++++++++++++----------- yfinance/utils.py | 2 ++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 5a28d0ae3..fa070f1b8 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -715,19 +715,25 @@ def _create_financials_table(self, name, proxy): _stmt_annual = None _stmt_qtr = None try: - template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) - TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) - - if name == "balance-sheet": - # Note: balance sheet is the only financial statement with no ttm detail - _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order) - else: - _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order, TTM_dicts, template_ttm_order) + # Developers note: TTM and template stuff allows for reproducing the nested structure + # visible on Yahoo website. But more work needed to make it user-friendly! Ideally + # return a tree data structure instead of Pandas MultiIndex + # So until this is implemented, just return simple tables + _stmt_annual = utils.get_financials_time_series(self.ticker, name, "annual", ticker_url, proxy, self.session) + _stmt_qtr = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) + + # template_ttm_order, template_annual_order, template_order, level_detail = utils.build_template(data_store["FinancialTemplateStore"]) + # TTM_dicts, Annual_dicts = utils.retreive_financial_details(data_store['QuoteTimeSeriesStore']) + # if name == "balance-sheet": + # # Note: balance sheet is the only financial statement with no ttm detail + # _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order) + # else: + # _stmt_annual = utils.format_annual_financial_statement(level_detail, Annual_dicts, template_annual_order, TTM_dicts, template_ttm_order) # Data store doesn't contain quarterly data, so retrieve using different url: - _qtr_data = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) - - _stmt_qtr = utils.format_quarterly_financial_statement(_qtr_data, level_detail, template_order) + # _qtr_data = utils.get_financials_time_series(self.ticker, name, "quarterly", ticker_url, proxy, self.session) + # _stmt_qtr = utils.format_quarterly_financial_statement(_qtr_data, level_detail, template_order) + except: pass diff --git a/yfinance/utils.py b/yfinance/utils.py index 6debbf724..62bc5e5b7 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -318,6 +318,8 @@ def _finditem1(key, obj): df.index = df.index.str.replace("^"+timescale, "", regex=True) + # Reorder table to match order on Yahoo website + df = df.reindex([k for k in keys if k in df.index]) df = df[sorted(df.columns, reverse=True)] return df From 9e529f3c8f14370307cb6414ed74f22d742b4b60 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Fri, 28 Oct 2022 14:18:13 +0100 Subject: [PATCH 20/20] Revert version bump --- CHANGELOG.rst | 4 ---- yfinance/version.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d36c16cb4..7ac846ab7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,9 +1,5 @@ Change Log =========== -0.1.78 ------- -- Updated the way in which the annual financial data is pieced together (income statement, balance sheet and cash flow statement) this now more accurately reflects what is presented in Yahoo Finance. -- Added functionality to enable users to pull in detail from the Yahoo Finance Analysis page. 0.1.77 ------ diff --git a/yfinance/version.py b/yfinance/version.py index cbf0ad46c..ecb526e9f 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.1.78" +version = "0.1.77"