Skip to content

Commit

Permalink
Merge pull request #53 from gradientinstitute/sphinx
Browse files Browse the repository at this point in the history
Sphinx setup, documentation for estimators.py, evaluators.py
  • Loading branch information
dsteinberg committed Apr 6, 2023
2 parents e88fb7a + 6a4555c commit 574c66e
Show file tree
Hide file tree
Showing 18 changed files with 692 additions and 110 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
.*
*__pycache__*
*.egg*


# Sphinx
docs/build
42 changes: 38 additions & 4 deletions cinspect/dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Partial dependence and individual conditional expectation functions."""

import numbers
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -193,10 +193,44 @@ def construct_grid(

def plot_partial_dependence_density(
ax, grid, density, feature_name, categorical, color="black", alpha=0.5
):
"""Plot partial dependency on ax.
) -> Tuple[np.ndarray, Union[np.ndarray, List[np.ndarray]]]:
"""
Plot partial dependency on axes ax.
TODO next
Parameters
----------
ax : _type_
_description_
grid : _type_
_description_
density : _type_
_description_
feature_name : _type_
_description_
categorical : _type_
_description_
color : str, optional
_description_, by default "black"
alpha : float, optional
_description_, by default 0.5
Returns
-------
bins: : np.ndarray
The edges of the bins. Length nbins + 1 (nbins left edges and right edge of last bin).
Always a single array even when multiple data sets are passed in.
n : Union[np.ndarray, List[np.ndarray]]]
The values of the histogram bins.
If input x is an array, then this is an array of length nbins.
If input is a sequence of arrays [data1, data2, ...],
then this is a list of arrays with the values
of the histograms for each of the arrays in the same order.
The dtype of the array n (or of its element arrays) will always be float
even if no weighting or normalization is used.
TODO: proper docstring
"""
# plot the distribution for of the variable on the second axis
if categorical:
Expand Down
146 changes: 121 additions & 25 deletions cinspect/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Licensed under the Apache 2.0 License.
"""Convenience estimators for causal estimation."""

from typing import NamedTuple, Union
from typing import Any, NamedTuple, Optional, Union # , Self

import numpy as np

import numpy.typing as npt
from scipy import linalg, stats
from sklearn.base import BaseEstimator, RegressorMixin, clone, check_X_y
from sklearn.base import BaseEstimator, RegressorMixin, check_X_y, clone
from sklearn.linear_model import BayesianRidge, LinearRegression
from sklearn.utils.validation import check_is_fitted

Expand All @@ -17,6 +17,9 @@
class RegressionStatisticalResults(NamedTuple):
"""Statistical results object for linear regressors.
TODO should this be private? Only used internally
TODO sphinx parses attributes directly
Attributes
----------
beta: float or ndarray
Expand Down Expand Up @@ -55,8 +58,18 @@ def __repr__(self) -> str:


class _StatMixin:
def model_statistics(self):
"""Get the coefficient statistics for this estimator."""
"""Mixin to get linear coefficient statistics for an estimator."""

def model_statistics(self) -> RegressionStatisticalResults:
"""
Get the linear coefficient statistics for this estimator.
Returns
-------
RegressionStatisticalResults
Linear coefficient statistics for this estimator.
"""
# TODO should we not check that dof_, t_ p_ are fitted as well?
check_is_fitted(self, attributes=["coef_", "coef_se_"])
stats = RegressionStatisticalResults(
beta=self.coef_,
Expand All @@ -74,7 +87,20 @@ class LinearRegressionStat(LinearRegression, _StatMixin):
def fit(self, X, y, sample_weight=None):
"""Fit linear regression model to data.
TODO: complete docstring
Parameters
----------
X : npt.ArrayLike
Training features, of shape (n_samples, n_features)
TODO I believe ArrayLike includes dataframes: verify
y : npt.ArrayLike
Training targets, of shape (n_samples, n_targets)
sample_weight : npt.ArrayLike, optional
Weights for each sample, of shape (n_samples, ), by default None
Returns
-------
self : LinearRegressionStat
The fitted object
"""
super().fit(X, y, sample_weight)
X, y = check_X_y(X, y)
Expand All @@ -95,7 +121,19 @@ class BayesianRidgeStat(BayesianRidge, _StatMixin):
def fit(self, X, y, sample_weight=None):
"""Fit bayesian ridge estimator to data.
TODO: complete docstring
Parameters
----------
X : npt.ArrayLike
Training features, of shape (n_samples, n_features).
y : npt.ArrayLike
Training targets, of shape (n_samples, n_targets)
sample_weight : npt.ArrayLike, optional
Weights for each sample, of shape (n_samples, ), by default None
Returns
-------
self : BayesianRegressionStat
The fitted object
"""
super().fit(X, y, sample_weight)
X, y = check_X_y(X, y)
Expand All @@ -118,24 +156,29 @@ class BinaryTreatmentRegressor(BaseEstimator, RegressorMixin):
depending on the value of the treatment.
NOTE: This can be used in conjunction with the
`evaluators.BinaryTreatmentEffect` evaluator to obtain statistics of a
:class:`cinspect.evaluators.BinaryTreatmentEffect` evaluator to obtain statistics of a
binary treatment.
Parameters
----------
estimator: scikit learn compatible estimator
TODO
treatment_column: str or int
TODO
treatment_val: any
TODO
estimator : BaseEstimator
scikit learn compatible estimator,
from which separate treatment and control estimators will be generated
TODO perhaps allow separate estimators for treatment and control;
treatment_column: Union[str, int]
Treatment column index
TODO: str only if it's a dataframe
treatment_val: Optional[Any], default 1
Constant value of treatment column
which denotes that the current row is in the treatment cohort
TODO example
"""

def __init__(
self,
estimator,
treatment_column,
treatment_val=1,
estimator : BaseEstimator,
treatment_column : Union[str, int],
treatment_val : Optional[Any] = 1,
):
"""Construct a new instance of a BinaryTreatmentRegressor."""
self.estimator = estimator
Expand All @@ -148,9 +191,9 @@ def fit(self, X, y, groups=None):
Parameters
----------
X: ndarray or DataFrame
TODO
Training features, of shape (n_samples, n_features)
y: ndarray or DataFrame
TODO
Training targets, of shape (n_samples, n_targets)
groups: ndarray, optional
Group labels for the samples used while splitting the dataset into
train/test set. Only used in conjunction with a parameter search
Expand All @@ -175,8 +218,20 @@ def fit(self, X, y, groups=None):
self.c_estimator_.fit(Xc, yc)
return self

def predict(self, X):
"""Predict the outcomes."""
def predict(self, X: npt.ArrayLike) -> np.ndarray:
"""
Predict the outcomes, choosing the estimator based on the value of treatment column.
Parameters
----------
X : npt.ArrayLike
Features, of shape (n_prediction_samples, n_features)
Returns
-------
y : np.ndarray
Predicted outcomes, of shape (n_prediction_samples, n_targets)
"""
check_is_fitted(self, attributes=["t_estimator_", "c_estimator_"])
Xt, Xc, t_mask = _treatment_split(X, self.treatment_column, self.treatment_val)
Ey = np.zeros(len(X))
Expand All @@ -188,16 +243,57 @@ def predict(self, X):
return Ey

def get_params(self, deep: bool = True) -> dict:
"""Get this estimator's initialisation parameters."""
"""
Get parameters for this estimator.
This is a method of :class:`~sklearn.base.BaseEstimator`.
TODO make deep argument functional.
I believe this implementation could be replaced with the class's default implementation
Parameters
----------
deep : bool, optional
If True, will return the parameters for this estimator and contained subobjects
that are estimators.
By default True
Returns
-------
params : dict
Parameter names mapped to their values.
"""
return {
"estimator": self.estimator,
"treatment_column": self.treatment_column,
"treatment_val": self.treatment_val,
}

def set_params(self, **parameters: dict):
"""Set this estimator's initialisation parameters."""
for parameter, value in parameters.items():
def set_params(self, **params: dict) -> Any: # TODO use Self: PEP 673, Python 3.11
"""
Set the parameters of this estimator.
This is a method of :class:`~sklearn.base.BaseEstimator`.
TODO satisfy the following:
The method works on simple estimators as well as on nested objects (such as Pipeline).
The latter have parameters of the form <component>__<parameter>
so that it's possible to update each component of a nested object.
TODO I believe this could be replaced with the base class's implementation
Parameters
----------
**params : dict
BinaryTreatmentRegressor parameters.
Returns
-------
self: BinaryTreatmentRegressor
BinaryTreatmentRegressor instance.
"""
for parameter, value in params.items():
setattr(self, parameter, value)
return self

Expand Down

0 comments on commit 574c66e

Please sign in to comment.