From c76bf0128fcac6c660ef7cf5931005880fc95cb5 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sat, 22 Oct 2022 22:56:09 +0200 Subject: [PATCH 1/5] Improve timezone cache to make it more reliable when using threads by using SQLLite. --- yfinance/utils.py | 70 +++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 64426603c..14d5cc203 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -30,6 +30,7 @@ import sys as _sys import os as _os import appdirs as _ad +import sqlite3 from threading import Lock cache_mutex = Lock() @@ -88,7 +89,9 @@ def get_news_by_isin(isin, proxy=None, session=None): return data.get('news', {}) -def empty_df(index=[]): +def empty_df(index=None): + if index is None: + index = [] empty = _pd.DataFrame(index=index, data={ 'Open': _np.nan, 'High': _np.nan, 'Low': _np.nan, 'Close': _np.nan, 'Adj Close': _np.nan, 'Volume': _np.nan}) @@ -489,43 +492,44 @@ def __str__(self): # Simple file cache of ticker->timezone: +class KVStore(): + + def __init__(self, filename): + self.conn = sqlite3.connect(filename, timeout=10, check_same_thread=False) + self.conn.execute('pragma journal_mode=wal') + self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid') + self.conn.commit() + + def get(self, key): + item = self.conn.execute('select value from "kv" where key=?', (key,)) + if item: + return next(item, (None,))[0] + + def set(self, key, value): + self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value)) + self.conn.commit() + + def delete(self, key): + self.conn.execute('delete from "kv" where key=?', (key,)) + self.conn.commit() + + def get_cache_dirpath(): return _os.path.join(_ad.user_cache_dir(), "py-yfinance") -def cache_lookup_tkr_tz(tkr): - fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv") - if not _os.path.isfile(fp): - return None - df = _pd.read_csv(fp, index_col="Ticker") - if not tkr in df.index: - return None - return df.loc[tkr,"Tz"] -def cache_store_tkr_tz(tkr, tz): - dp = get_cache_dirpath() - fp = _os.path.join(dp, "tkr-tz.csv") +tz_db = KVStore(_os.path.join(get_cache_dirpath(), "tkr-tz.db")) - cache_mutex.acquire() - if not _os.path.isdir(dp): - _os.makedirs(dp) - if (not _os.path.isfile(fp)) and (tz is not None): - # Initialise CSV file with first entry - df = _pd.DataFrame({"Tz":[tz]}, index=[tkr]) - df.index.name = "Ticker" - df.to_csv(fp) +def cache_lookup_tkr_tz(tkr): + with cache_mutex: + return tz_db.get(tkr) - else: - df = _pd.read_csv(fp, index_col="Ticker") - if tz is None: - # Delete if in cache: - if tkr in df.index: - df = df.drop(tkr) - df.to_csv(fp) - else: - if tkr in df.index: - raise Exception("Tkr {} tz already in cache".format(tkr)) - df.loc[tkr,"Tz"] = tz - df.to_csv(fp) - cache_mutex.release() +def cache_store_tkr_tz(tkr, tz): + with cache_mutex: + if tz is None: + tz_db.delete(tkr) + elif tz_db.get(tkr) is not None: + raise Exception("Tkr {} tz already in cache".format(tkr)) + return tz_db.set(tkr, tz) From 783df549780d37231d494d8c445270e9ddc5937a Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sat, 22 Oct 2022 23:56:50 +0200 Subject: [PATCH 2/5] Bugfix, do not set tz in cache if it is None, just delete it. --- yfinance/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 14d5cc203..fc003bf3e 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -532,4 +532,5 @@ def cache_store_tkr_tz(tkr, tz): tz_db.delete(tkr) elif tz_db.get(tkr) is not None: raise Exception("Tkr {} tz already in cache".format(tkr)) - return tz_db.set(tkr, tz) + else: + tz_db.set(tkr, tz) From 422a50672d914f55c1a5263c644d9b3adc005603 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 23 Oct 2022 13:43:40 +0200 Subject: [PATCH 3/5] Lazy init of cache db and added migration of data from old CSV cache. --- yfinance/base.py | 6 +-- yfinance/utils.py | 99 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 73 insertions(+), 32 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 62482dd4f..683e1e218 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -379,7 +379,7 @@ def _get_ticker_tz(self): if not self._tz is None: return self._tz - tz = utils.cache_lookup_tkr_tz(self.ticker) + tz = utils.tz_cache.lookup(self.ticker) if tz is not None: invalid_value = not isinstance(tz, str) @@ -391,7 +391,7 @@ def _get_ticker_tz(self): if invalid_value: # Clear from cache and force re-fetch - utils.cache_store_tkr_tz(self.ticker, None) + utils.tz_cache.store(self.ticker, None) tz = None if tz is None: @@ -407,7 +407,7 @@ def _get_ticker_tz(self): tz = None if tz is not None: # info fetch is relatively slow so cache timezone - utils.cache_store_tkr_tz(self.ticker, tz) + utils.tz_cache.store(self.ticker, tz) self._tz = tz return tz diff --git a/yfinance/utils.py b/yfinance/utils.py index fc003bf3e..39dbb8d16 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -22,6 +22,8 @@ from __future__ import print_function import datetime as _datetime +from typing import Dict, Union + import pytz as _tz import requests as _requests import re as _re @@ -30,10 +32,10 @@ import sys as _sys import os as _os import appdirs as _ad -import sqlite3 +import sqlite3 as _sqlite3 +import atexit as _atexit from threading import Lock -cache_mutex = Lock() try: import ujson as _json @@ -491,46 +493,85 @@ def __str__(self): return str(self.prog_bar) -# Simple file cache of ticker->timezone: -class KVStore(): +class _KVStore: + """Simpel Sqlite backed key/value store, key and value are strings. Should be thread safe.""" def __init__(self, filename): - self.conn = sqlite3.connect(filename, timeout=10, check_same_thread=False) - self.conn.execute('pragma journal_mode=wal') - self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid') - self.conn.commit() - - def get(self, key): + self._cache_mutex = Lock() + with self._cache_mutex: + self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False) + self.conn.execute('pragma journal_mode=wal') + self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid') + self.conn.commit() + _atexit.register(self.close) + + def close(self): + if self.conn is not None: + with self._cache_mutex: + self.conn.close() + self.conn = None + + def get(self, key: str) -> Union[str, None]: + """Get value for key if it exists else returns None""" item = self.conn.execute('select value from "kv" where key=?', (key,)) if item: return next(item, (None,))[0] - def set(self, key, value): - self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value)) - self.conn.commit() - - def delete(self, key): - self.conn.execute('delete from "kv" where key=?', (key,)) - self.conn.commit() + def set(self, key: str, value) -> str: + with self._cache_mutex: + self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value)) + self.conn.commit() + def bulk_set(self, kvdata: Dict[str, str]): + records = tuple(i for i in kvdata.items()) + with self._cache_mutex: + self.conn.executemany('replace into "kv" (key, value) values (?,?)', records) + self.conn.commit() -def get_cache_dirpath(): - return _os.path.join(_ad.user_cache_dir(), "py-yfinance") + def delete(self, key: str): + with self._cache_mutex: + self.conn.execute('delete from "kv" where key=?', (key,)) + self.conn.commit() -tz_db = KVStore(_os.path.join(get_cache_dirpath(), "tkr-tz.db")) +class _TzCache: + """Simple sqllite file cache of ticker->timezone""" + def __init__(self): + self._tz_db = None -def cache_lookup_tkr_tz(tkr): - with cache_mutex: - return tz_db.get(tkr) + def lookup(self, tkr): + return self.tz_db.get(tkr) - -def cache_store_tkr_tz(tkr, tz): - with cache_mutex: + def store(self, tkr, tz): if tz is None: - tz_db.delete(tkr) - elif tz_db.get(tkr) is not None: + self.tz_db.delete(tkr) + elif self.tz_db.get(tkr) is not None: raise Exception("Tkr {} tz already in cache".format(tkr)) else: - tz_db.set(tkr, tz) + self.tz_db.set(tkr, tz) + + @property + def cache_dirpath(self): + return _os.path.join(_ad.user_cache_dir(), "py-yfinance") + + @property + def tz_db(self): + # lazy init + if self._tz_db is None: + self._tz_db = _KVStore(_os.path.join(self.cache_dirpath, "tkr-tz.db")) + self._migrate_cache_tkr_tz() + + return self._tz_db + + def _migrate_cache_tkr_tz(self): + """Migrate contents from old ticker CSV-cache to SQLite db""" + fp = _os.path.join(self.cache_dirpath, "tkr-tz.csv") + if not _os.path.isfile(fp): + return None + df = _pd.read_csv(fp, index_col="Ticker") + self.tz_db.bulk_set(df.to_dict()['Tz']) + _os.remove(fp) + + +tz_cache = _TzCache() From d24a25f57922c1116a5f7f833cbeb6e9c087de80 Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 23 Oct 2022 13:59:48 +0200 Subject: [PATCH 4/5] Add missing typehint --- yfinance/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 39dbb8d16..a3e924535 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -517,7 +517,7 @@ def get(self, key: str) -> Union[str, None]: if item: return next(item, (None,))[0] - def set(self, key: str, value) -> str: + def set(self, key: str, value: str) -> str: with self._cache_mutex: self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value)) self.conn.commit() @@ -570,6 +570,7 @@ def _migrate_cache_tkr_tz(self): if not _os.path.isfile(fp): return None df = _pd.read_csv(fp, index_col="Ticker") + print(df.to_dict()['Tz']) self.tz_db.bulk_set(df.to_dict()['Tz']) _os.remove(fp) From 6c21c1994ed34267d60c6f60d3ff1f88ba57683f Mon Sep 17 00:00:00 2001 From: Fredrik Corneliusson Date: Sun, 23 Oct 2022 15:27:41 +0200 Subject: [PATCH 5/5] Fix bug, create cache directory if it does not exists. --- yfinance/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yfinance/utils.py b/yfinance/utils.py index a3e924535..5a7a033b7 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -559,6 +559,8 @@ def cache_dirpath(self): def tz_db(self): # lazy init if self._tz_db is None: + if not _os.path.isdir(self.cache_dirpath): + _os.makedirs(self.cache_dirpath) self._tz_db = _KVStore(_os.path.join(self.cache_dirpath, "tkr-tz.db")) self._migrate_cache_tkr_tz()