Add alpha parameter to linear_predictor and predict (#388)

Quantco · Jul 3, 2021 · 863a44c · 863a44c
1 parent 2f341fb
commit 863a44c
Show file tree

Hide file tree

Showing 3 changed files with 174 additions and 42 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -10,6 +10,11 @@ Changelog
 X.X.X - 2021-XX-XX
 ------------------
 
+**New features:**
+
+* The :meth:`linear_predictor` and :meth:`predict` methods of :class:`~quantcore.glm.GeneralizedLinearRegressor` and :class:`~quantcore.glm.GeneralizedLinearRegressorCV`
+  gain an ``alpha`` parameter (in complement to ``alpha_index``). Moreover, they are now able to predict for multiple penalties.
+
 **Other:**
 
 * Methods of :class:`~quantcore.glm._link.Link` now consistently return NumPy arrays, whereas they used to preserve pandas series in special cases.
@@ -27,16 +32,16 @@ X.X.X - 2021-XX-XX
 
 **Tutorials and documentation improvements:**
 
-- Adding tutorials to the documentation
-- Additional documentation improvements
+- Adding tutorials to the documentation.
+- Additional documentation improvements.
 
 **Bug fix:**
 
 - Verbose progress bar now working again.
 
 **Other:**
 
-- Small improvement in documentation for the ``alpha_index`` argument to :func:`quantcore.glm.GeneralizedLinearRegressor.predict`.
+- Small improvement in documentation for the ``alpha_index`` argument to :meth:`~quantcore.glm.GeneralizedLinearRegressor.predict`.
 - Pinned pre-commit hooks versions.
 
 1.4.1 - 2021-05-01
@@ -49,7 +54,7 @@ We now have Windows builds!
 
 **Deprecations:**
 
-- Fusing the ``alpha`` and ``alphas`` arguments for :func:`quantcore.glm.GeneralizedLinearRegressor`. ``alpha`` now also accepts array like inputs. ``alphas`` is now deprecated but can still be used for backward compatibility. The ``alphas`` argument will be removed with the next major version.
+- Fusing the ``alpha`` and ``alphas`` arguments for :class:`~quantcore.glm.GeneralizedLinearRegressor`. ``alpha`` now also accepts array like inputs. ``alphas`` is now deprecated but can still be used for backward compatibility. The ``alphas`` argument will be removed with the next major version.
 
 **Bug fix:**
 
@@ -86,27 +91,27 @@ Maintenance release to get a fresh build for OSX.
 
 **New feature:**
 
-- Direct support for pandas categorical types in ``fit`` and ``predict``. These will be converted into a ``CategoricalMatrix``.
+- Direct support for pandas categorical types in ``fit`` and ``predict``. These will be converted into a :class:`CategoricalMatrix`.
 
 1.0.1 - 2020-11-12
 ------------------
 
-This is a maintenance release to be compatible with `quantcore.matrix>=1.0.0`.
+This is a maintenance release to be compatible with ``quantcore.matrix>=1.0.0``.
 
 1.0.0 - 2020-11-11
 ------------------
 
 **Other:**
 
-- Renamed `alpha_level` attribute of :func:`quantcore.glm.GeneralizedLinearRegressor` and :func:`quantcore.glm.GeneralizedLinearRegressorCV` to `alpha_index`.
-- Clarified behavior of `scale_predictors`.
+- Renamed ``alpha_level`` attribute of :class:`~quantcore.glm.GeneralizedLinearRegressor` and :class:`~quantcore.glm.GeneralizedLinearRegressorCV` to ``alpha_index``.
+- Clarified behavior of ``scale_predictors``.
 
 0.0.15 - 2020-11-11
 -------------------
 
 **Other:**
 
-- Pin quantcore.matrix < 1.0.0 as we are expecting a breaking change with version 1.0.0.
+- Pin ``quantcore.matrix<1.0.0`` as we are expecting a breaking change with version 1.0.0.
 
 0.0.14 - 2020-08-06
 -------------------

diff --git a/src/quantcore/glm/_glm.py b/src/quantcore/glm/_glm.py
@@ -10,7 +10,7 @@
 import warnings
 from collections.abc import Iterable
 from itertools import chain
-from typing import Any, Optional, Tuple, Union, cast
+from typing import Any, Optional, Sequence, Tuple, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -1028,12 +1028,12 @@ def _report_diagnostics(
 
         Parameters
         ----------
-        full_report: bool (default=False)
+        full_report : bool, optional (default=False)
             Print all available information. When ``False`` and
             ``custom_columns`` is ``None``, a restricted set of columns is
             printed out.
 
-        custom_columns: Iterable (optional, default=None)
+        custom_columns : iterable, optional (default=None)
             Print only the specified columns.
         """
         diagnostics = self._get_formatted_diagnostics(full_report, custom_columns)
@@ -1054,12 +1054,12 @@ def _get_formatted_diagnostics(
 
         Parameters
         ----------
-        full_report: bool (default=False)
+        full_report : bool, optional (default=False)
             Print all available information. When ``False`` and
             ``custom_columns`` is ``None``, a restricted set of columns is
             printed out.
 
-        custom_columns: Iterable (optional, default=None)
+        custom_columns : iterable, optional (default=None)
             Print only the specified columns.
         """
         if not hasattr(self, "diagnostics_"):
@@ -1090,30 +1090,63 @@ def _get_formatted_diagnostics(
             ]
         return df[keep_cols]
 
+    def _find_alpha_index(self, alpha):
+        if alpha is None:
+            return None
+        if not self.alpha_search:
+            raise ValueError
+        # `np.isclose` because comparing floats is difficult
+        isclose = np.isclose(self._alphas, alpha)
+        if np.sum(isclose) == 1:
+            return np.argmax(isclose)  # cf. stackoverflow.com/a/61117770
+        raise IndexError(
+            f"Could not determine a unique index for alpha {alpha}. Available values: "
+            f"{self._alphas}. Consider specifying the index directly via 'alpha_index'."
+        )
+
     def linear_predictor(
-        self, X: ArrayLike, offset: Optional[ArrayLike] = None, alpha_index: int = None
+        self,
+        X: ArrayLike,
+        offset: Optional[ArrayLike] = None,
+        alpha_index: Optional[Union[int, Sequence[int]]] = None,
+        alpha: Optional[Union[float, Sequence[float]]] = None,
     ):
         """Compute the linear predictor, ``X * coef_ + intercept_``.
 
+        If ``alpha_search`` is ``True``, but ``alpha_index`` and ``alpha`` are
+        both ``None``, we use the last alpha value ``self._alphas[-1]``.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Samples. This may be a Pandas data frame with categorical dtypes.
             In that case the user must ensure that the categories are exactly
             the same (including the order) as during fit.
 
-        offset: {None, array-like}, shape (n_samples,) optional (default=None)
+        offset : array-like, shape (n_samples,), optional (default=None)
+
+        alpha_index : int or list[int], optional (default=None)
+            Sets the index of the alpha(s) to use in case ``alpha_search`` is
+            ``True``. Incompatible with ``alpha`` (see below).
 
-        alpha_index: int, optional (default=None)
-            Sets the index of the alpha to use in case ``alpha_search`` is
-            ``True``.
+        alpha : float or list[float], optional (default=None)
+            Sets the alpha(s) to use in case ``alpha_search`` is ``True``.
+            Incompatible with ``alpha_index`` (see above).
 
         Returns
         -------
-        C : array, shape (n_samples,)
+        array, shape (n_samples, n_alphas)
             The linear predictor.
         """
         check_is_fitted(self, "coef_")
+
+        if (alpha is not None) and (alpha_index is not None):
+            raise ValueError("Please specify only one of {alpha_index, alpha}.")
+        elif np.isscalar(alpha):  # `None` doesn't qualify
+            alpha_index = self._find_alpha_index(alpha)
+        elif alpha is not None:
+            alpha_index = [self._find_alpha_index(a) for a in alpha]  # type: ignore
+
         X = check_array_matrix_compliant(
             X,
             accept_sparse=["csr", "csc", "coo"],
@@ -1122,43 +1155,61 @@ def linear_predictor(
             ensure_2d=True,
             allow_nd=False,
         )
-        xb = (
-            X @ self.coef_ + self.intercept_
-            if alpha_index is None
-            else X @ self.coef_path_[alpha_index] + self.intercept_path_[alpha_index]
-        )
-        if offset is None:
-            return xb
-        return xb + offset
+
+        if alpha_index is None:
+            xb = X @ self.coef_ + self.intercept_
+        elif np.isscalar(alpha_index):  # `None` doesn't qualify
+            xb = X @ self.coef_path_[alpha_index] + self.intercept_path_[alpha_index]
+            if offset is not None:
+                xb += offset
+        else:  # hopefully a list or some such
+            xb = np.stack(
+                [
+                    X @ self.coef_path_[idx] + self.intercept_path_[idx]
+                    for idx in alpha_index  # type: ignore
+                ],
+                axis=1,
+            )
+            if offset is not None:
+                xb += np.asanyarray(offset)[:, np.newaxis]
+
+        return xb
 
     def predict(
         self,
         X: ShapedArrayLike,
         sample_weight: Optional[ArrayLike] = None,
         offset: Optional[ArrayLike] = None,
-        alpha_index: int = None,
+        alpha_index: Optional[Union[int, Sequence[int]]] = None,
+        alpha: Optional[Union[float, Sequence[float]]] = None,
     ):
         """Predict using GLM with feature matrix ``X``.
 
+        If ``alpha_search`` is ``True``, but ``alpha_index`` and ``alpha`` are
+        both ``None``, we use the last alpha value ``self._alphas[-1]``.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Samples. This may be a Pandas data frame with categorical dtypes.
             In that case the user must ensure that the categories are exactly
             the same (including the order) as during fit.
 
-        sample_weight : {None, array-like}, shape (n_samples,), optional (default=None)
+        sample_weight : array-like, shape (n_samples,), optional (default=None)
+
+        offset : array-like, shape (n_samples,), optional (default=None)
 
-        offset: {None, array-like}, shape (n_samples,), optional (default=None)
+        alpha_index : int or list[int], optional (default=None)
+            Sets the index of the alpha(s) to use in case ``alpha_search`` is
+            ``True``. Incompatible with ``alpha`` (see below).
 
-        alpha_index: int, optional (default=None)
-            Sets the index of the alpha to use in case ``alpha_search`` is
-            ``True``. If ``alpha_search`` is ``True``, but ``alpha_index``
-            is ``None``, we use the last alpha value ``self._alphas[-1]``.
+        alpha : float or list[float], optional (default=None)
+            Sets the alpha(s) to use in case ``alpha_search`` is ``True``.
+            Incompatible with ``alpha_index`` (see above).
 
         Returns
         -------
-        C : array, shape (n_samples,)
+        array, shape (n_samples, n_alphas)
             Predicted values times ``sample_weight``.
         """
         X = check_array_matrix_compliant(
@@ -1169,10 +1220,15 @@ def predict(
             ensure_2d=True,
             allow_nd=False,
         )
-        eta = self.linear_predictor(X, offset=offset, alpha_index=alpha_index)
+        eta = self.linear_predictor(
+            X, offset=offset, alpha_index=alpha_index, alpha=alpha
+        )
         mu = get_link(self.link, get_family(self.family)).inverse(eta)
-        weights = _check_weights(sample_weight, X.shape[0], X.dtype)
 
+        if sample_weight is None:
+            return mu
+
+        weights = _check_weights(sample_weight, X.shape[0], X.dtype)
         return mu * weights
 
     def estimate_phi(
@@ -1188,12 +1244,12 @@ def estimate_phi(
         y : array-like, shape (n_samples,)
             Target values.
 
-        sample_weight : {None, array-like}, shape (n_samples,), optional (default=None)
+        sample_weight : array-like, shape (n_samples,), optional (default=None)
             Sample weights.
 
         Returns
         -------
-        phi : float
+        float
             Dispersion parameter.
         """
         check_is_fitted(self, "coef_")
@@ -1256,7 +1312,7 @@ def score(
         y : array-like, shape (n_samples,)
             True values of target.
 
-        sample_weight : {None, array-like}, shape (n_samples,), optional (default=None)
+        sample_weight :array-like, shape (n_samples,), optional (default=None)
             Sample weights.
 
         Returns

diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
@@ -1544,7 +1544,7 @@ def test_clonable(estimator):
 def test_get_best_intercept(
     link: Link, distribution: ExponentialDispersionModel, tol: float, offset
 ):
-    y = np.array([1, 1, 1, 2], dtype=np.float)
+    y = np.array([1, 1, 1, 2], dtype=np.float_)
     if isinstance(distribution, BinomialDistribution):
         y -= 1
 
@@ -1620,6 +1620,77 @@ def test_alpha_search(regression_data):
     assert_allclose(mdl_path.intercept_, mdl_no_path.intercept_)
 
 
+@pytest.mark.parametrize("alpha, alpha_index", [(0.5, 0), (0.75, 1), (None, 1)])
+def test_predict_scalar(regression_data, alpha, alpha_index):
+
+    X, y = regression_data
+    offset = np.zeros_like(y)
+
+    estimator = GeneralizedLinearRegressor(alpha=[0.5, 0.75], alpha_search=True)
+    estimator.fit(X, y)
+
+    target = estimator.predict(X, alpha_index=alpha_index)
+
+    candidate = estimator.predict(X, alpha=alpha, offset=offset)
+    np.testing.assert_allclose(candidate, target)
+
+
+@pytest.mark.parametrize(
+    "alpha, alpha_index",
+    [([0.5, 0.75], [0, 1]), ([0.75, 0.5], [1, 0]), ([0.5, 0.5], [0, 0])],
+)
+def test_predict_list(regression_data, alpha, alpha_index):
+
+    X, y = regression_data
+    offset = np.zeros_like(y)
+
+    estimator = GeneralizedLinearRegressor(alpha=[0.5, 0.75], alpha_search=True)
+    estimator.fit(X, y)
+
+    target = np.stack(
+        [
+            estimator.predict(X, alpha_index=alpha_index[0]),
+            estimator.predict(X, alpha_index=alpha_index[1]),
+        ],
+        axis=1,
+    )
+
+    candidate = estimator.predict(X, alpha=alpha, offset=offset)
+    np.testing.assert_allclose(candidate, target)
+
+    candidate = estimator.predict(X, alpha_index=alpha_index, offset=offset)
+    np.testing.assert_allclose(candidate, target)
+
+
+def test_predict_error(regression_data):
+
+    X, y = regression_data
+
+    estimator = GeneralizedLinearRegressor(alpha=0.5, alpha_search=False).fit(X, y)
+
+    with pytest.raises(ValueError):
+        estimator.predict(X, alpha=0.5)
+    with pytest.raises(ValueError):
+        estimator.predict(X, alpha=[0.5])
+    with pytest.raises(AttributeError):
+        estimator.predict(X, alpha_index=0)
+    with pytest.raises(AttributeError):
+        estimator.predict(X, alpha_index=[0])
+
+    estimator.set_params(alpha=[0.5, 0.75], alpha_search=True).fit(X, y)
+
+    with pytest.raises(IndexError):
+        estimator.predict(X, y, alpha=0.25)
+    with pytest.raises(IndexError):
+        estimator.predict(X, y, alpha=[0.25, 0.5])
+    with pytest.raises(IndexError):
+        estimator.predict(X, y, alpha_index=2)
+    with pytest.raises(IndexError):
+        estimator.predict(X, y, alpha_index=[2, 0])
+    with pytest.raises(ValueError):
+        estimator.predict(X, y, alpha_index=0, alpha=0.5)
+
+
 def test_very_large_initial_gradient():
     # this is a problem where 0 is a starting value that produces
     # a very large gradient initially