fairlearn · romanlutz · Feb 7, 2023 · Feb 5, 2023 · Feb 5, 2023 · Feb 5, 2023
diff --git a/fairlearn/metrics/__init__.py b/fairlearn/metrics/__init__.py
@@ -25,16 +25,16 @@
 
 import sys as _sys
 
-from ._disparities import demographic_parity_difference  # noqa: F401
-from ._disparities import demographic_parity_ratio  # noqa: F401
-from ._disparities import equalized_odds_difference, equalized_odds_ratio  # noqa: F401
-from ._extra_metrics import count  # noqa: F401
-from ._extra_metrics import false_negative_rate  # noqa: F401
-from ._extra_metrics import false_positive_rate  # noqa: F401
-from ._extra_metrics import mean_prediction  # noqa: F401
-from ._extra_metrics import selection_rate  # noqa: F401
-from ._extra_metrics import true_negative_rate  # noqa: F401
-from ._extra_metrics import true_positive_rate  # noqa: F401; noqa: F401
+from ._base_metrics import count  # noqa: F401
+from ._base_metrics import false_negative_rate  # noqa: F401
+from ._base_metrics import false_positive_rate  # noqa: F401
+from ._base_metrics import mean_prediction  # noqa: F401
+from ._base_metrics import selection_rate  # noqa: F401
+from ._base_metrics import true_negative_rate  # noqa: F401
+from ._base_metrics import true_positive_rate  # noqa: F401; noqa: F401
+from ._fairness import demographic_parity_difference  # noqa: F401
+from ._fairness import demographic_parity_ratio  # noqa: F401
+from ._fairness import equalized_odds_difference, equalized_odds_ratio  # noqa: F401
 from ._generated_metrics import _generated_metric_dict  # noqa: F401
 from ._make_derived_metric import make_derived_metric  # noqa: F401
 from ._metric_frame import MetricFrame  # noqa: F401
@@ -55,14 +55,14 @@
     "plot_model_comparison"
 ]
 
-_disparities = [
+_fairness = [
     "demographic_parity_difference",
     "demographic_parity_ratio",
     "equalized_odds_difference",
     "equalized_odds_ratio",
 ]
 
-_extra_metrics = [
+_base_metrics = [
     "true_positive_rate",
     "true_negative_rate",
     "false_positive_rate",
@@ -73,5 +73,5 @@
 ]
 
 __all__ = (
-    _core + _disparities + _extra_metrics + list(sorted(_generated_metric_dict.keys()))
+    _core + _fairness + _base_metrics + list(sorted(_generated_metric_dict.keys()))
 )
diff --git a/fairlearn/metrics/_extra_metrics.py → fairlearn/metrics/_base_metrics.py b/fairlearn/metrics/_extra_metrics.py → fairlearn/metrics/_base_metrics.py
@@ -1,18 +1,21 @@
 # Copyright (c) Microsoft Corporation and Fairlearn contributors.
 # Licensed under the MIT License.
 
-"""A variety of extra metrics useful for assessing fairness.
+"""A variety of base metrics useful for assessing fairness.
 
 These are metrics which are not part of `scikit-learn`.
 """
 
 import numpy as np
 import sklearn.metrics as skm
+from typing import Any
 
-from ._mean_predictions import mean_prediction  # noqa: F401
 from ._metric_frame import check_consistent_length
-from ._selection_rate import selection_rate  # noqa: F401,E501
+from fairlearn.utils._input_manipulations import _convert_to_ndarray_and_squeeze
 
+_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE = (
+    "Empty y_pred passed to selection_rate function."
+)
 _TOO_MANY_UNIQUE_Y_VALS = "Must have no more than two unique y values"
 _RESTRICTED_VALS_IF_POS_LABEL_NONE = (
     "If pos_label is not specified, values must be from {0, 1} or {-1, 1}"  # noqa: E501
@@ -257,3 +260,122 @@ def count(y_true, y_pred) -> int:
     """
     check_consistent_length(y_true, y_pred)
     return len(y_true)
+
+
+def mean_prediction(y_true, y_pred, sample_weight=None) -> float:
+    r"""Calculate the (weighted) mean prediction.
+
+    The true values are ignored, but required as an argument in order
+    to maintain a consistent interface
+
+    Parameters
+    ----------
+    y_true : array_like
+        The true labels (ignored)
+
+    y_pred : array_like
+        The predicted labels
+
+    sample_weight : array_like
+        Optional array of sample weights
+    """
+    y_p = _convert_to_ndarray_and_squeeze(y_pred)
+    s_w = np.ones(len(y_p))
+    if sample_weight is not None:
+        s_w = _convert_to_ndarray_and_squeeze(sample_weight)
+
+    return np.dot(y_p, s_w) / s_w.sum()
+
+
+def _mean_overprediction(y_true, y_pred, sample_weight=None) -> float:
+    """Calculate the (weighted) mean overprediction.
+
+    This is the (weighted) mean of the error where any negative
+    errors (i.e. underpredictions) are set to zero
+
+    Parameters
+    ----------
+    y_true : array_like
+        The true values
+
+    y_pred : array_like
+        The predicted values
+
+    sample_weight : array_like
+        Optional array of sample weights
+    """
+    y_t = _convert_to_ndarray_and_squeeze(y_true)
+    y_p = _convert_to_ndarray_and_squeeze(y_pred)
+    s_w = np.ones(len(y_p))
+    if sample_weight is not None:
+        s_w = _convert_to_ndarray_and_squeeze(sample_weight)
+
+    err = y_p - y_t
+    err[err < 0] = 0
+
+    return np.dot(err, s_w) / s_w.sum()
+
+
+def _mean_underprediction(y_true, y_pred, sample_weight=None) -> float:
+    """Calculate the (weighted) mean underprediction.
+
+    This is the (weighted) mean of the error where any
+    positive errors (i.e. overpredictions) are set to zero.
+    The absolute value of the underpredictions is used, so the
+    returned value is always positive.
+
+    Parameters
+    ----------
+    y_true : array_like
+        The true values
+
+    y_pred : array_like
+        The predicted values
+
+    sample_weight : array_like
+        Optional array of sample weights
+    """
+    y_t = _convert_to_ndarray_and_squeeze(y_true)
+    y_p = _convert_to_ndarray_and_squeeze(y_pred)
+    s_w = np.ones(len(y_p))
+    if sample_weight is not None:
+        s_w = _convert_to_ndarray_and_squeeze(sample_weight)
+
+    err = y_p - y_t
+    err[err > 0] = 0
+
+    # Error metrics should decrease to 0 so have to flip sign
+    return -np.dot(err, s_w) / s_w.sum()
+
+
+def selection_rate(y_true, y_pred, *, pos_label: Any = 1, sample_weight=None) -> float:
+    """Calculate the fraction of predicted labels matching the 'good' outcome.
+
+    The argument `pos_label` specifies the 'good' outcome. For consistency with
+    other metric functions, the ``y_true`` argument is required, but ignored.
+
+    Read more in the :ref:`User Guide <custom_fairness_metrics>`.
+
+    Parameters
+    ----------
+    y_true : array_like
+        The true labels (ignored)
+
+    y_pred : array_like
+        The predicted labels
+
+    pos_label : Scalar
+        The label to treat as the 'good' outcome
+
+    sample_weight : array_like
+        Optional array of sample weights
+    """
+    selected = _convert_to_ndarray_and_squeeze(y_pred) == pos_label
+    if len(selected) == 0:
+        raise ValueError(_EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE)
+
+    s_w = np.ones(len(selected))
+    if sample_weight is not None:
+        s_w = np.squeeze(np.asarray(sample_weight))
+
+    return np.dot(selected, s_w) / s_w.sum()
diff --git a/fairlearn/metrics/_disparities.py → fairlearn/metrics/_fairness.py b/fairlearn/metrics/_disparities.py → fairlearn/metrics/_fairness.py
@@ -1,9 +1,9 @@
 # Copyright (c) Microsoft Corporation and Fairlearn contributors.
 # Licensed under the MIT License.
 
-"""Metrics for measuring disparity."""
+"""Metrics for measuring fairness."""
 
-from ._extra_metrics import false_positive_rate, selection_rate, true_positive_rate
+from ._base_metrics import false_positive_rate, selection_rate, true_positive_rate
 from ._metric_frame import MetricFrame
 
 

diff --git a/fairlearn/metrics/_generated_metrics.py b/fairlearn/metrics/_generated_metrics.py
@@ -3,7 +3,7 @@
 
 import sklearn.metrics as skm
 
-from ._extra_metrics import (
+from ._base_metrics import (
     false_negative_rate,
     false_positive_rate,
     selection_rate,
@@ -13,7 +13,7 @@
 from ._make_derived_metric import make_derived_metric
 
 METRICS_SPEC = [
-    # base metrics from _extra_metrics
+    # base metrics from _base_metrics
     (true_positive_rate, ["difference", "ratio"]),
     (true_negative_rate, ["difference", "ratio"]),
     (false_positive_rate, ["difference", "ratio"]),

diff --git a/fairlearn/metrics/_mean_predictions.py b/fairlearn/metrics/_mean_predictions.py
diff --git a/fairlearn/metrics/_metric_frame.py b/fairlearn/metrics/_metric_frame.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from sklearn.utils import check_consistent_length
 
-from fairlearn.metrics._input_manipulations import _convert_to_ndarray_and_squeeze
+from fairlearn.utils._input_manipulations import _convert_to_ndarray_and_squeeze
 
 from ._annotated_metric_function import AnnotatedMetricFunction
 from ._disaggregated_result import DisaggregatedResult

diff --git a/fairlearn/metrics/_selection_rate.py b/fairlearn/metrics/_selection_rate.py
diff --git a/fairlearn/metrics/_input_manipulations.py → fairlearn/utils/_input_manipulations.py b/fairlearn/metrics/_input_manipulations.py → fairlearn/utils/_input_manipulations.py
diff --git a/test/unit/metrics/test_extra_metrics.py → test/unit/metrics/test_base_metrics.py b/test/unit/metrics/test_extra_metrics.py → test/unit/metrics/test_base_metrics.py
@@ -6,7 +6,7 @@
 import sklearn.metrics as skm
 
 import fairlearn.metrics as metrics
-from fairlearn.metrics._extra_metrics import _get_labels_for_confusion_matrix
+from fairlearn.metrics._base_metrics import _get_labels_for_confusion_matrix
 
 # =============================================
 

diff --git a/test/unit/metrics/test_selection_rate.py b/test/unit/metrics/test_selection_rate.py
@@ -4,7 +4,7 @@
 import pytest
 
 import fairlearn.metrics as metrics
-from fairlearn.metrics._selection_rate import _EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE
+from fairlearn.metrics._base_metrics import _EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE
 
 
 def test_selection_rate_empty():