Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Regression metrics return arrays for multi-output cases #2493

Closed
wants to merge 14 commits into from
81 changes: 60 additions & 21 deletions sklearn/metrics/metrics.py
Expand Up @@ -1883,7 +1883,7 @@ def hamming_loss(y_true, y_pred, classes=None):
###############################################################################
# Regression loss functions
###############################################################################
def mean_absolute_error(y_true, y_pred):
def mean_absolute_error(y_true, y_pred, average='micro'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be output_weights and not average.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is the rreason for most of the test failures, I shall fix it.

"""Mean absolute error regression loss

Parameters
Expand All @@ -1894,10 +1894,16 @@ def mean_absolute_error(y_true, y_pred):
y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs]
Estimated target values.

average : 'micro' or False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be consistent with the other metrics, it should be None instead of False see precision_score for instance.

If 'micro' returns a float.
If False, returns an array (multi-output)
(default: 'micro')

Returns
-------
loss : float
A positive floating point value (the best value is 0.0).
loss : float or a numpy array of shape[n_outputs]
If average is True, a positive floating point value (the best value is 0.0).
Else, a numpy array of positive floating points is returned.

Examples
--------
Expand All @@ -1910,13 +1916,18 @@ def mean_absolute_error(y_true, y_pred):
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> mean_absolute_error(y_true, y_pred)
0.75

>>> mean_absolute_error(y_true, y_pred, average=False)
array([ 0.5, 1. ])
"""
if not average:
axis = 0
else:
axis = None
y_type, y_true, y_pred = _check_reg_targets(y_true, y_pred)
return np.mean(np.abs(y_pred - y_true))
return np.mean(np.abs(y_pred - y_true), axis=axis)


def mean_squared_error(y_true, y_pred):
def mean_squared_error(y_true, y_pred, average='micro'):
"""Mean squared error regression loss

Parameters
Expand All @@ -1927,10 +1938,16 @@ def mean_squared_error(y_true, y_pred):
y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs]
Estimated target values.

average : 'micro' or False
If 'micro' returns a float.
If False, returns an array (multi-output)
(default: 'micro')

Returns
-------
loss : float
A positive floating point value (the best value is 0.0).
loss : float or a numpy array of shape[n_outputs]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: can you add a space between shape and [n_outputs]?

If average is "micro", a positive floating point value (the best value is 0.0).
Else, a numpy array of positive floating points is returned.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it is the mean_squared_error

Examples
--------
Expand All @@ -1943,10 +1960,15 @@ def mean_squared_error(y_true, y_pred):
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
>>> mean_squared_error(y_true, y_pred) # doctest: +ELLIPSIS
0.708...

>>> mean_squared_error(y_true, y_pred, average=False)
array([ 0.41666667, 1. ])
"""
if not average:
axis = 0
else:
axis = None
y_type, y_true, y_pred = _check_reg_targets(y_true, y_pred)
return np.mean((y_pred - y_true) ** 2)
return np.mean((y_pred - y_true) ** 2, axis=axis)


###############################################################################
Expand Down Expand Up @@ -2000,7 +2022,7 @@ def explained_variance_score(y_true, y_pred):
return 1 - numerator / denominator


def r2_score(y_true, y_pred):
def r2_score(y_true, y_pred, average='micro'):
"""R^2 (coefficient of determination) regression score function.

Best possible score is 1.0, lower values are worse.
Expand All @@ -2013,10 +2035,17 @@ def r2_score(y_true, y_pred):
y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs]
Estimated target values.

average : 'micro' or False
If 'micro' returns a float.
If False, returns an array (multi-output)
(default: 'micro')

Returns
-------
z : float
The R^2 score.
z : float or a numpy array of shape[n_outputs]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a space missing shape [n_outputs].

If average is 'micro', it returns the R^2 score, flattened across 1-D.
If average is False, it returns an array of floats corresponding to
the R^2 score of each dimension.

Notes
-----
Expand All @@ -2041,23 +2070,33 @@ def r2_score(y_true, y_pred):
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> r2_score(y_true, y_pred) # doctest: +ELLIPSIS
0.938...

>>> r2_score(y_true, y_pred, average=False) # doctest: +ELLIPSIS
array([ 0.96543779, 0.90816327])
"""
y_type, y_true, y_pred = _check_reg_targets(y_true, y_pred)

if len(y_true) == 1:
raise ValueError("r2_score can only be computed given more than one"
" sample.")
numerator = ((y_true - y_pred) ** 2).sum(dtype=np.float64)
denominator = ((y_true - y_true.mean(axis=0)) ** 2).sum(dtype=np.float64)

if denominator == 0.0:
if numerator == 0.0:
return 1.0
if not average:
axis = 0
else:
axis = None
numerator = ((y_true - y_pred) ** 2).sum(dtype=np.float64, axis=axis)
denominator = ((y_true - y_true.mean(axis=0)) ** 2).sum(dtype=np.float64, axis=axis)
if denominator.sum() == 0.0:
if numerator.sum() == 0.0:
if average:
return 1.0
else:
return np.ones(y_true.shape[1], dtype=np.float64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is a little mistake here, since you can have a defined r2_score for some output, but not all.

A test would be needed for that case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would something like this do?

y_true = [[1, 1], [1, 1]] 
y_pred = [[1, 1], [1, 1]]
assert array_equal(r2_score(y_true, y_pred, average=None), np.array([1.], [1.]))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider for instance,

In [2]: from sklearn.metrics import r2_score

In [3]: r2_score([0, 0], [2, 1])
Out[3]: 0.0

In [4]: r2_score([-1, 1], [2, 1])
Out[4]: -3.5

I expect r2_score([[0, -1],[0, 1]], [[2, 2],[1, 1]], average=None) to be equal to np.array([0, -3.5]).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay.

else:
# arbitrary set to zero to avoid -inf scores, having a constant
# y_true is not interesting for scoring a regression anyway
return 0.0
if average:
return 0.0
else:
return np.zeros(y_true.shape[1], dtype=np.float64)

return 1 - numerator / denominator

Expand Down
22 changes: 22 additions & 0 deletions sklearn/metrics/tests/test_metrics.py
Expand Up @@ -1992,3 +1992,25 @@ def test_log_loss():
y_pred = np.asarray(y_pred) > .5
loss = log_loss(y_true, y_pred, normalize=True, eps=.1)
assert_almost_equal(loss, log_loss(y_true, np.clip(y_pred, .1, .9)))

def test_regression_multioutput_array():
y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

mse = list(mean_squared_error(y_true, y_pred, average=False))
mae = list(mean_absolute_error(y_true, y_pred, average=False))
r = list(r2_score(y_true, y_pred, average=False))
assert_equal(mse, [0.125, 0.5625])
assert_equal(mae, [0.25, 0.625])
assert_almost_equal(r, [0.95, 0.93], decimal=2)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use assert_array_almost_equal?


# mean_absolute_error and mean_squared_error are equal because
# it is a binary problem.
y_true = [[0, 0]]*4
y_pred = [[1, 1]]*4
mse = list(mean_squared_error(y_true, y_pred, average=False))
mae = list(mean_absolute_error(y_true, y_pred, average=False))
r = list(r2_score(y_true, y_pred, average=False))
assert_equal(mse, [1., 1.])
assert_equal(mae, [1., 1.])
assert_equal(r, [0., 0.])