From 7de905de13f94ab07aa2b655181d0d7c90b19577 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Tue, 13 Jul 2021 11:12:28 +0100
Subject: [PATCH] MAINT: Final fixes

Fix all broken Yahoo readers
Test against pandas 1
Change minimums
---
 pandas_datareader/_utils.py                   |  5 +-
 pandas_datareader/base.py                     |  8 +--
 pandas_datareader/compat/__init__.py          | 15 ++--
 pandas_datareader/naver.py                    |  2 +-
 pandas_datareader/tests/test_econdb.py        |  2 +-
 pandas_datareader/tests/test_famafrench.py    | 66 +++++++++--------
 pandas_datareader/tests/test_fred.py          |  4 +-
 pandas_datareader/tests/yahoo/test_options.py |  2 +-
 pandas_datareader/tests/yahoo/test_yahoo.py   | 70 +++++++++----------
 pandas_datareader/yahoo/_headers.py           | 13 ++++
 pandas_datareader/yahoo/actions.py            |  2 +-
 pandas_datareader/yahoo/daily.py              | 29 ++------
 pandas_datareader/yahoo/quotes.py             | 23 ++++++
 setup.cfg                                     | 13 ++++
 14 files changed, 149 insertions(+), 105 deletions(-)
 create mode 100644 pandas_datareader/yahoo/_headers.py

diff --git a/pandas_datareader/_utils.py b/pandas_datareader/_utils.py
index 28870f0b..245cb33a 100644
--- a/pandas_datareader/_utils.py
+++ b/pandas_datareader/_utils.py
@@ -53,8 +53,11 @@ def _sanitize_dates(start, end):
     return start, end
 
 
-def _init_session(session, retry_count=3):
+def _init_session(session):
     if session is None:
         session = requests.Session()
         # do not set requests max_retries here to support arbitrary pause
+    else:
+        if not isinstance(session, requests.Session):
+            raise TypeError("session must be a request.Session")
     return session
diff --git a/pandas_datareader/base.py b/pandas_datareader/base.py
index f589c73b..f60d2665 100644
--- a/pandas_datareader/base.py
+++ b/pandas_datareader/base.py
@@ -70,7 +70,7 @@ def __init__(
         self.pause = pause
         self.timeout = timeout
         self.pause_multiplier = 1
-        self.session = _init_session(session, retry_count)
+        self.session = _init_session(session)
         self.freq = freq
         self.headers = None
 
@@ -148,11 +148,7 @@ def _get_response(self, url, params=None, headers=None):
         params : dict or None
             parameters passed to the URL
         """
-
-        # Use default headers if not passes and not using a user session
-        if headers is None:
-            headers = self.headers
-
+        headers = headers or self.headers
         pause = self.pause
         last_response_text = ""
         for _ in range(self.retry_count + 1):
diff --git a/pandas_datareader/compat/__init__.py b/pandas_datareader/compat/__init__.py
index 26d502a5..5eba99d8 100644
--- a/pandas_datareader/compat/__init__.py
+++ b/pandas_datareader/compat/__init__.py
@@ -39,11 +39,16 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None):
     # for "get_filepath_or_buffer" starting in pandas >= 0.20.0
     if isinstance(filepath_or_buffer, dict):
         return filepath_or_buffer, encoding, compression
-
-    tmp = com._get_filepath_or_buffer(
-        filepath_or_buffer, encoding=encoding, compression=None
-    )
-    return tmp.filepath_or_buffer, tmp.encoding, tmp.compression
+    try:
+        tmp = com._get_filepath_or_buffer(
+            filepath_or_buffer, encoding=encoding, compression=None
+        )
+        return tmp.filepath_or_buffer, tmp.encoding, tmp.compression
+    except AttributeError:
+        tmp = com.get_filepath_or_buffer(
+            filepath_or_buffer, encoding=encoding, compression=None
+        )
+        return tmp
 
 
 string_types = (str,)
diff --git a/pandas_datareader/naver.py b/pandas_datareader/naver.py
index 0c45ae0c..7187456b 100644
--- a/pandas_datareader/naver.py
+++ b/pandas_datareader/naver.py
@@ -35,7 +35,7 @@ def __init__(
         if not isinstance(symbols, string_types):
             raise NotImplementedError("Bulk-fetching is not implemented")
 
-        super(NaverDailyReader, self).__init__(
+        super().__init__(
             symbols=symbols,
             start=start,
             end=end,
diff --git a/pandas_datareader/tests/test_econdb.py b/pandas_datareader/tests/test_econdb.py
index f8891444..dadec47a 100644
--- a/pandas_datareader/tests/test_econdb.py
+++ b/pandas_datareader/tests/test_econdb.py
@@ -58,7 +58,7 @@ def test_get_tourism(self):
             start=pd.Timestamp("2008-01-01"),
             end=pd.Timestamp("2012-01-01"),
         )
-        df = df.astype(np.float)
+        df = df.astype(float)
         jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
         us = np.array(
             [175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
diff --git a/pandas_datareader/tests/test_famafrench.py b/pandas_datareader/tests/test_famafrench.py
index 201a5367..4259a454 100644
--- a/pandas_datareader/tests/test_famafrench.py
+++ b/pandas_datareader/tests/test_famafrench.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 from pandas import testing as tm
 import pytest
@@ -48,49 +49,49 @@ def test_f_f_research(self):
             {
                 "Mkt-RF": [
                     -3.36,
-                    3.4,
+                    3.40,
                     6.31,
-                    2.0,
+                    2.00,
                     -7.89,
-                    -5.56,
+                    -5.57,
                     6.93,
                     -4.77,
                     9.54,
                     3.88,
-                    0.6,
+                    0.60,
                     6.82,
                 ],
                 "SMB": [
-                    0.38,
-                    1.2,
-                    1.42,
-                    4.98,
-                    0.05,
-                    -1.97,
-                    0.16,
-                    -3.00,
-                    3.92,
-                    1.15,
-                    3.70,
-                    0.7,
+                    0.37,
+                    1.19,
+                    1.44,
+                    4.86,
+                    0.14,
+                    -1.84,
+                    0.18,
+                    -3.02,
+                    3.93,
+                    1.07,
+                    3.78,
+                    0.68,
                 ],
                 "HML": [
-                    0.31,
-                    3.16,
-                    2.1,
-                    2.81,
-                    -2.38,
-                    -4.5,
-                    -0.27,
-                    -1.95,
-                    -3.12,
-                    -2.59,
-                    -0.9,
-                    3.81,
+                    0.33,
+                    3.19,
+                    2.11,
+                    2.91,
+                    -2.39,
+                    -4.52,
+                    -0.36,
+                    -1.90,
+                    -3.23,
+                    -2.46,
+                    -0.95,
+                    3.64,
                 ],
                 "RF": [
-                    0.0,
-                    0.0,
+                    0.00,
+                    0.00,
                     0.01,
                     0.01,
                     0.01,
@@ -106,7 +107,10 @@ def test_f_f_research(self):
             index=pd.period_range("2010-01-01", "2010-12-01", freq="M", name="Date"),
             columns=["Mkt-RF", "SMB", "HML", "RF"],
         )
-        tm.assert_frame_equal(results[0], exp, check_less_precise=0)
+        received = results[0]
+        np.testing.assert_allclose(received, exp)
+        tm.assert_index_equal(received.index, exp.index)
+        tm.assert_index_equal(received.columns, exp.columns)
 
     def test_me_breakpoints(self):
         results = web.DataReader(
diff --git a/pandas_datareader/tests/test_fred.py b/pandas_datareader/tests/test_fred.py
index 3029ee18..7776afc7 100644
--- a/pandas_datareader/tests/test_fred.py
+++ b/pandas_datareader/tests/test_fred.py
@@ -72,7 +72,9 @@ def test_fred_multi(self):  # pragma: no cover
         )
         expected.index.rename("DATE", inplace=True)
         expected.index.freq = "MS"
-        tm.assert_frame_equal(received, expected, check_less_precise=True)
+        np.testing.assert_allclose(received, expected)
+        tm.assert_index_equal(received.index, expected.index)
+        tm.assert_index_equal(received.columns, expected.columns)
 
     def test_fred_multi_bad_series(self):
         names = ["NOTAREALSERIES", "CPIAUCSL", "ALSO FAKE"]
diff --git a/pandas_datareader/tests/yahoo/test_options.py b/pandas_datareader/tests/yahoo/test_options.py
index 3c4c726d..d37839d8 100644
--- a/pandas_datareader/tests/yahoo/test_options.py
+++ b/pandas_datareader/tests/yahoo/test_options.py
@@ -9,7 +9,7 @@
 from pandas_datareader import data as web
 
 
-@pytest.yield_fixture
+@pytest.fixture
 def aapl():
     aapl = web.Options("aapl", "yahoo")
     yield aapl
diff --git a/pandas_datareader/tests/yahoo/test_yahoo.py b/pandas_datareader/tests/yahoo/test_yahoo.py
index 80ad951f..4fa7e524 100644
--- a/pandas_datareader/tests/yahoo/test_yahoo.py
+++ b/pandas_datareader/tests/yahoo/test_yahoo.py
@@ -159,7 +159,7 @@ def test_get_data_null_as_missing_data(self, adj_pr):
         else:
             floats.append("Adj Close")
 
-        assert result[floats].dtypes.all() == np.floating
+        assert result[floats].dtypes.all() == np.float64
 
     @skip_on_exception(RemoteDataError)
     def test_get_data_multiple_symbols_two_dates(self):
@@ -168,7 +168,7 @@ def test_get_data_multiple_symbols_two_dates(self):
         assert result.size == 3
 
         # sanity checking
-        assert result.dtypes == np.floating
+        assert result.dtypes == np.float64
 
         expected = np.array(
             [
@@ -207,12 +207,12 @@ def test_get_data_yahoo_actions(self):
         assert actions.loc["2005-02-28", "value"][0] == 1 / 2.0
 
         assert actions.loc["1995-11-21", "action"][0] == "DIVIDEND"
-        assert round(actions.loc["1995-11-21", "value"][0], 3) == 0.120
+        assert round(actions.loc["1995-11-21", "value"][0], 3) == 0.030
 
         actions = web.get_data_yahoo_actions("AAPL", start, end, adjust_dividends=True)
 
         assert actions.loc["1995-11-21", "action"][0] == "DIVIDEND"
-        assert round(actions.loc["1995-11-21", "value"][0], 4) == 0.0043
+        assert round(actions.loc["1995-11-21", "value"][0], 4) == 0.0011
 
     def test_get_data_yahoo_actions_invalid_symbol(self):
         start = datetime(1990, 1, 1)
@@ -226,14 +226,14 @@ def test_yahoo_reader_class(self):
         r = YahooDailyReader("GOOG", start="JAN-01-2015")
         df = r.read()
 
-        assert df.Volume.loc["JAN-02-2015"] == 1447500
+        assert df.Volume.loc["JAN-02-2015"] == 1447563
 
         session = requests.Session()
 
         r = YahooDailyReader("GOOG", session=session)
         assert r.session is session
 
-    def test_yahoo_DataReader(self):
+    def test_yahoo_datareader(self):
         start = datetime(2010, 1, 1)
         end = datetime(2015, 5, 9)
         # yahoo will adjust for dividends by default
@@ -275,19 +275,19 @@ def test_yahoo_DataReader(self):
                     "DIVIDEND",
                 ],
                 "value": [
-                    0.52,
-                    0.47,
-                    0.47,
-                    0.47,
-                    0.14285714,
-                    0.47,
-                    0.43571,
-                    0.43571,
-                    0.43571,
-                    0.43571,
-                    0.37857,
-                    0.37857,
-                    0.37857,
+                    0.130000,
+                    0.117500,
+                    0.117500,
+                    0.117500,
+                    0.142857,
+                    0.117500,
+                    0.108929,
+                    0.108929,
+                    0.108929,
+                    0.108929,
+                    0.094643,
+                    0.094643,
+                    0.094643,
                 ],
             },
             index=exp_idx,
@@ -316,19 +316,19 @@ def test_yahoo_DataReader(self):
                     "DIVIDEND",
                 ],
                 "value": [
-                    0.52,
-                    0.47,
-                    0.47,
-                    0.47,
-                    0.14285714,
-                    3.29,
-                    3.05,
-                    3.05,
-                    3.05,
-                    3.05,
-                    2.65,
-                    2.65,
-                    2.65,
+                    0.1300,
+                    0.1175,
+                    0.1175,
+                    0.1175,
+                    0.1429,
+                    0.8225,
+                    0.7625,
+                    0.7625,
+                    0.7625,
+                    0.7625,
+                    0.6625,
+                    0.6625,
+                    0.6625,
                 ],
             },
             index=exp_idx,
@@ -344,13 +344,13 @@ def test_yahoo_DataReader(self):
         result = web.DataReader("NTR", "yahoo-actions", start, end)
 
         exp_idx = pd.DatetimeIndex(
-            ["2018-12-28", "2018-09-27", "2018-06-28", "2018-03-28", "2018-01-02"]
+            ["2018-12-28", "2018-09-27", "2018-06-28", "2018-03-28"]
         )
 
         exp = pd.DataFrame(
             {
-                "action": ["DIVIDEND", "DIVIDEND", "DIVIDEND", "DIVIDEND", "SPLIT"],
-                "value": [0.43, 0.40, 0.40, 0.40, 1.00],
+                "action": ["DIVIDEND", "DIVIDEND", "DIVIDEND", "DIVIDEND"],
+                "value": [0.43, 0.40, 0.40, 0.40],
             },
             index=exp_idx,
         )
diff --git a/pandas_datareader/yahoo/_headers.py b/pandas_datareader/yahoo/_headers.py
new file mode 100644
index 00000000..dbc8932b
--- /dev/null
+++ b/pandas_datareader/yahoo/_headers.py
@@ -0,0 +1,13 @@
+"""
+Default header
+"""
+DEFAULT_HEADERS = {
+    "Connection": "keep-alive",
+    "Expires": str(-1),
+    "Upgrade-Insecure-Requests": str(1),
+    # Google Chrome:
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    ),
+}
diff --git a/pandas_datareader/yahoo/actions.py b/pandas_datareader/yahoo/actions.py
index 48a0f549..f4a81ee8 100644
--- a/pandas_datareader/yahoo/actions.py
+++ b/pandas_datareader/yahoo/actions.py
@@ -12,7 +12,7 @@ class YahooActionReader(YahooDailyReader):
     """
 
     def read(self):
-        data = super(YahooActionReader, self).read()
+        data = super().read()
         actions = {}
         if isinstance(data.columns, MultiIndex):
             data = data.swaplevel(0, 1, axis=1)
diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py
index 0ade5e59..eda47402 100644
--- a/pandas_datareader/yahoo/daily.py
+++ b/pandas_datareader/yahoo/daily.py
@@ -8,6 +8,7 @@
 
 from pandas_datareader._utils import RemoteDataError
 from pandas_datareader.base import _DailyBaseReader
+from pandas_datareader.yahoo._headers import DEFAULT_HEADERS
 
 
 class YahooDailyReader(_DailyBaseReader):
@@ -35,9 +36,8 @@ class YahooDailyReader(_DailyBaseReader):
         single value given for symbol, represents the pause between retries.
     session : Session, default None
         requests.sessions.Session instance to be used. Passing a session
-        is an advanced usage and you must either set the required
-        headers in the session directly or explicitly override
-        using the ``headers`` argument.
+        is an advanced usage and you must set any required
+        headers in the session directly.
     adjust_price : bool, default False
         If True, adjusts all prices in hist_data ('Open', 'High', 'Low',
         'Close') based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
@@ -53,9 +53,6 @@ class YahooDailyReader(_DailyBaseReader):
         If True, adds Dividend and Split columns to dataframe.
     adjust_dividends: bool, default true
         If True, adjusts dividends for splits.
-    headers : dict, optional
-        Headers to use when reading data. If None (the default), a
-        standard set of headers is used.
     """
 
     def __init__(
@@ -72,9 +69,8 @@ def __init__(
         interval="d",
         get_actions=False,
         adjust_dividends=True,
-        headers=None,
     ):
-        super(YahooDailyReader, self).__init__(
+        super().__init__(
             symbols=symbols,
             start=start,
             end=end,
@@ -87,21 +83,10 @@ def __init__(
         # Ladder up the wait time between subsequent requests to improve
         # probability of a successful retry
         self.pause_multiplier = 2.5
-        if headers is not None:
-            self.headers = headers
-        elif session is None:
-            self.headers = {
-                "Connection": "keep-alive",
-                "Expires": str(-1),
-                "Upgrade-Insecure-Requests": str(1),
-                # Google Chrome:
-                "User-Agent": (
-                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                    "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-                ),
-            }
+        if session is None:
+            self.headers = DEFAULT_HEADERS
         else:
-            self.headers = None
+            self.headers = session.headers
 
         self.adjust_price = adjust_price
         self.ret_index = ret_index
diff --git a/pandas_datareader/yahoo/quotes.py b/pandas_datareader/yahoo/quotes.py
index 4dddff3b..fba12548 100644
--- a/pandas_datareader/yahoo/quotes.py
+++ b/pandas_datareader/yahoo/quotes.py
@@ -5,6 +5,7 @@
 
 from pandas_datareader.base import _BaseReader
 from pandas_datareader.compat import string_types
+from pandas_datareader.yahoo._headers import DEFAULT_HEADERS
 
 _DEFAULT_PARAMS = {
     "lang": "en-US",
@@ -17,6 +18,28 @@ class YahooQuotesReader(_BaseReader):
 
     """Get current yahoo quote"""
 
+    def __init__(
+        self,
+        symbols=None,
+        start=None,
+        end=None,
+        retry_count=3,
+        pause=0.1,
+        session=None,
+    ):
+        super().__init__(
+            symbols=symbols,
+            start=start,
+            end=end,
+            retry_count=retry_count,
+            pause=pause,
+            session=session,
+        )
+        if session is not None:
+            self.headers = session.headers
+        else:
+            self.headers = DEFAULT_HEADERS
+
     @property
     def url(self):
         return "https://query1.finance.yahoo.com/v7/finance/quote"
diff --git a/setup.cfg b/setup.cfg
index 704fd846..6118ea57 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,6 +24,19 @@ markers =
     requires_api_key: mark a test as requiring an API key
     alpha_vantage: mark a test of the AlphaVantage reader
     quandl: mark a test of the Quandl reader
+filterwarnings =
+    ignore:`np.bool` is a deprecated alias:DeprecationWarning:pandas.core.indexes
+    ignore:`np.object` is a deprecated alias:DeprecationWarning:pandas.core.indexes
+    ignore:`np.float` is a deprecated alias:DeprecationWarning:pandas.core.indexes
+    ignore:`np.complex` is a deprecated alias:DeprecationWarning:pandas.core.indexes
+    ignore:`np.bool` is a deprecated alias:DeprecationWarning:pandas.core.internals.blocks
+    ignore:`np.object` is a deprecated alias:DeprecationWarning:pandas.core.internals.blocks
+    ignore:`np.object` is a deprecated alias:DeprecationWarning:pandas.core.internals.construction
+    ignore:`np.object` is a deprecated alias:DeprecationWarning:pandas.io.parsers
+    ignore:`np.object` is a deprecated alias:DeprecationWarning:pandas.core.dtypes.cast
+    ignore:`np.float` is a deprecated alias:DeprecationWarning:pandas.core.internals.blocks
+    ignore:`np.complex` is a deprecated alias:DeprecationWarning:pandas.core.internals.blocks
+    ignore:Converting `np.inexact` or `np.floating` to a dtype:DeprecationWarning:pandas.core.indexes
 
 [flake8]
 ignore = E203, E266, E501, W503