diff --git a/README.md b/README.md index 2e2fb0f88..8c5733087 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,7 @@ To install `yfinance` using `conda`, see - [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4 - [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1 - [html5lib](https://pypi.org/project/html5lib) \>= 1.1 +- [cryptography](https://pypi.org/project/cryptography) \>= 3.3.2 ### Optional (if you want to use `pandas_datareader`) diff --git a/meta.yaml b/meta.yaml index 6d3345fdf..b53a9196a 100644 --- a/meta.yaml +++ b/meta.yaml @@ -26,6 +26,8 @@ requirements: - frozendict >=2.3.4 - beautifulsoup4 >=4.11.1 - html5lib >=1.1 + # - pycryptodome >=3.6.6 + - cryptography >=3.3.2 - pip - python @@ -40,6 +42,8 @@ requirements: - frozendict >=2.3.4 - beautifulsoup4 >=4.11.1 - html5lib >=1.1 + # - pycryptodome >=3.6.6 + - cryptography >=3.3.2 - python test: diff --git a/requirements.txt b/requirements.txt index 5f467b322..08eceafa5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ pytz>=2022.5 frozendict>=2.3.4 beautifulsoup4>=4.11.1 html5lib>=1.1 +cryptography>=3.3.2 diff --git a/setup.py b/setup.py index 3261b09ef..6d532654c 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,9 @@ install_requires=['pandas>=1.3.0', 'numpy>=1.16.5', 'requests>=2.26', 'multitasking>=0.0.7', 'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5', - 'frozendict>=2.3.4', + 'frozendict>=2.3.4', + # 'pycryptodome>=3.6.6', + 'cryptography>=3.3.2', 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], entry_points={ 'console_scripts': [ diff --git a/yfinance/data.py b/yfinance/data.py index b6069cc6d..ed4657697 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -1,6 +1,17 @@ import functools from functools import lru_cache +import hashlib +from base64 import b64decode +usePycryptodome = False # slightly faster +# usePycryptodome = True +if usePycryptodome: + from Crypto.Cipher import AES + from Crypto.Util.Padding import unpad +else: + from cryptography.hazmat.primitives import padding + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + import requests as requests import re @@ -35,6 +46,76 @@ def wrapped(*args, **kwargs): return wrapped +def decrypt_cryptojs_aes(data): + encrypted_stores = data['context']['dispatcher']['stores'] + _cs = data["_cs"] + _cr = data["_cr"] + + _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) + password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() + + encrypted_stores = b64decode(encrypted_stores) + assert encrypted_stores[0:8] == b"Salted__" + salt = encrypted_stores[8:16] + encrypted_stores = encrypted_stores[16:] + + def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple: + """OpenSSL EVP Key Derivation Function + Args: + password (Union[str, bytes, bytearray]): Password to generate key from. + salt (Union[bytes, bytearray]): Salt to use. + keySize (int, optional): Output key length in bytes. Defaults to 32. + ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16. + iterations (int, optional): Number of iterations to perform. Defaults to 1. + hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'. + Returns: + key, iv: Derived key and Initialization Vector (IV) bytes. + + Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3 + OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78 + """ + + assert iterations > 0, "Iterations can not be less than 1." + + if isinstance(password, str): + password = password.encode("utf-8") + + final_length = keySize + ivSize + key_iv = b"" + block = None + + while len(key_iv) < final_length: + hasher = hashlib.new(hashAlgorithm) + if block: + hasher.update(block) + hasher.update(password) + hasher.update(salt) + block = hasher.digest() + for _ in range(1, iterations): + block = hashlib.new(hashAlgorithm, block).digest() + key_iv += block + + key, iv = key_iv[:keySize], key_iv[keySize:final_length] + return key, iv + + key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") + + if usePycryptodome: + cipher = AES.new(key, AES.MODE_CBC, iv=iv) + plaintext = cipher.decrypt(encrypted_stores) + plaintext = unpad(plaintext, 16, style="pkcs7") + else: + cipher = Cipher(algorithms.AES(key), modes.CBC(iv)) + decryptor = cipher.decryptor() + plaintext = decryptor.update(encrypted_stores) + decryptor.finalize() + unpadder = padding.PKCS7(128).unpadder() + plaintext = unpadder.update(plaintext) + unpadder.finalize() + plaintext = plaintext.decode("utf-8") + + decoded_stores = json.loads(plaintext) + return decoded_stores + + _SCRAPE_URL_ = 'https://finance.yahoo.com/quote' @@ -92,7 +173,15 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict: except IndexError: # Fetch failed, probably because Yahoo spam triggered return {} - data = json.loads(json_str)['context']['dispatcher']['stores'] + + data = json.loads(json_str) + + if "_cs" in data and "_cr" in data: + data = decrypt_cryptojs_aes(data) + + if "context" in data and "dispatcher" in data["context"]: + # Keep old code, just in case + data = data['context']['dispatcher']['stores'] # return data new_data = json.dumps(data).replace('{}', 'null')