From 816331634983303510a74038663218ec1995a74c Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 14 Jul 2020 18:02:53 +0900 Subject: [PATCH 01/30] mrr implementation --- catalyst/utils/metrics/mrr.py | 26 ++++++++++++++++++++++++ catalyst/utils/metrics/tests/test_mrr.py | 12 +++++++++++ 2 files changed, 38 insertions(+) create mode 100644 catalyst/utils/metrics/mrr.py create mode 100644 catalyst/utils/metrics/tests/test_mrr.py diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py new file mode 100644 index 0000000000..2125d37285 --- /dev/null +++ b/catalyst/utils/metrics/mrr.py @@ -0,0 +1,26 @@ +import torch + +def mrr( + outputs: torch.Tensor, + targets: torch.Tensor +) + + """ + Calculate the MRR score given model ouptputs and targets + Args: + outputs [batch_size, slate_length] (torch.Tensor): model outputs, logits + targets [batch_szie, slate_length] (torch.Tensor): ground truth, labels + Returns: + mrr (float): the mrr score + """ + outputs = outputs.clone() + targets = targets.clone() + + tmp = targets.view(-1, 1) + targets = tmp.expand_as(indices) + hits = (targets == indices).nonzero() + ranks = hits[:, -1] + 1 + ranks = ranks.float() + rranks = torch.reciprocal(ranks) + mrr = torch.sum(rranks).data / targets.size(0) + return mrr \ No newline at end of file diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py new file mode 100644 index 0000000000..f9a5489e6e --- /dev/null +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -0,0 +1,12 @@ +import torch + +from catalyst.utils import metrics + +def test_mrr(): + """ + Tests for catalyst.utils.metrics.mrr metric. + """ + + y_pred = [0.5, 0.2] + y_true = [1.0, 0.0] + \ No newline at end of file From 17a4b0344479f582d7eedfa67560316d3aa84381 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Wed, 15 Jul 2020 03:57:00 +0900 Subject: [PATCH 02/30] add mrr --- catalyst/utils/metrics/mrr.py | 29 ++++++++++++++++-------- catalyst/utils/metrics/tests/test_mrr.py | 12 +++++++++- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 2125d37285..51b964d127 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -3,7 +3,7 @@ def mrr( outputs: torch.Tensor, targets: torch.Tensor -) +): """ Calculate the MRR score given model ouptputs and targets @@ -15,12 +15,21 @@ def mrr( """ outputs = outputs.clone() targets = targets.clone() - - tmp = targets.view(-1, 1) - targets = tmp.expand_as(indices) - hits = (targets == indices).nonzero() - ranks = hits[:, -1] + 1 - ranks = ranks.float() - rranks = torch.reciprocal(ranks) - mrr = torch.sum(rranks).data / targets.size(0) - return mrr \ No newline at end of file + max_rank = targets.shape[0] + + _, indices = outputs.sort(descending=True, dim=-1) + true_sorted_by_preds = torch.gather(targets, dim=0, index=indices) + values, indices = torch.max(true_sorted_by_preds, dim=0) + indices = indices.type_as(values).unsqueeze(dim=0).t() + ats_rep = torch.tensor(data=max_rank, device=indices.device, dtype=torch.float32) + within_at_mask = (indices < ats_rep).type(torch.float32) + + result = torch.tensor(1.0) / (indices + torch.tensor(1.0)) + + zero_sum_mask = torch.sum(values) == 0.0 + result[zero_sum_mask] = 0.0 + + mrr = result * within_at_mask + return mrr[0] + +__all__ = ['mrr'] \ No newline at end of file diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index f9a5489e6e..e03c97c6e2 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -7,6 +7,16 @@ def test_mrr(): Tests for catalyst.utils.metrics.mrr metric. """ + # check 0 simple case y_pred = [0.5, 0.2] y_true = [1.0, 0.0] - \ No newline at end of file + + mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) + assert mrr == 1 + + # check 1 simple case + y_pred = [0.5, 0.2] + y_true = [0.0, 1.0] + + mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) + assert mrr == 0.5 From 0f1151a14881b43fe3c7fed1aae6434c3b663920 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Wed, 15 Jul 2020 16:36:54 +0900 Subject: [PATCH 03/30] edit codestyle --- catalyst/utils/metrics/mrr.py | 13 +++++++------ catalyst/utils/metrics/tests/test_mrr.py | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 51b964d127..d99345bdc3 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -1,9 +1,7 @@ import torch -def mrr( - outputs: torch.Tensor, - targets: torch.Tensor -): + +def mrr(outputs: torch.Tensor, targets: torch.Tensor): """ Calculate the MRR score given model ouptputs and targets @@ -21,7 +19,9 @@ def mrr( true_sorted_by_preds = torch.gather(targets, dim=0, index=indices) values, indices = torch.max(true_sorted_by_preds, dim=0) indices = indices.type_as(values).unsqueeze(dim=0).t() - ats_rep = torch.tensor(data=max_rank, device=indices.device, dtype=torch.float32) + ats_rep = torch.tensor( + data=max_rank, device=indices.device, dtype=torch.float32 + ) within_at_mask = (indices < ats_rep).type(torch.float32) result = torch.tensor(1.0) / (indices + torch.tensor(1.0)) @@ -32,4 +32,5 @@ def mrr( mrr = result * within_at_mask return mrr[0] -__all__ = ['mrr'] \ No newline at end of file + +__all__ = ["mrr"] diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index e03c97c6e2..b49a56dcc8 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -2,6 +2,7 @@ from catalyst.utils import metrics + def test_mrr(): """ Tests for catalyst.utils.metrics.mrr metric. From 93c637ef308332744198149850c8be258a3ca1b9 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Wed, 15 Jul 2020 21:40:29 +0900 Subject: [PATCH 04/30] Add changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c244686c00..59a7ba6712 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [20.07.1] - YYYY-MM-DD ### Added - +- MRR metrics calculation - ### Changed From f6d7496bd229e219b0b2f736091a174cc45c4438 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Wed, 15 Jul 2020 21:58:39 +0900 Subject: [PATCH 05/30] updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59a7ba6712..113eec5d05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [20.07.1] - YYYY-MM-DD ### Added -- MRR metrics calculation +- MRR metrics calculation ([#886](https://github.com/catalyst-team/catalyst/pull/886)) - ### Changed From 4c46f25ed9782afebe572fab93a24d1d5cbca3b2 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 16 Jul 2020 15:57:15 +0900 Subject: [PATCH 06/30] add docstring to mrr --- catalyst/utils/metrics/mrr.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index d99345bdc3..b175ccffa7 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -1,3 +1,7 @@ +""" +MRR metric. +""" + import torch From ae9089c5ac4e8032c9c3c35948f1526784508db2 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 25 Jul 2020 21:48:45 +0900 Subject: [PATCH 07/30] removed clones --- catalyst/utils/metrics/mrr.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index b1ef09fdb3..ad8e151a97 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -17,8 +17,6 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor): Returns: mrr (float): the mrr score """ - outputs = outputs.clone() - targets = targets.clone() max_rank = targets.shape[0] _, indices = outputs.sort(descending=True, dim=-1) From 6b765aa57855551197ab34cf484a1dc3069ee917 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 25 Jul 2020 22:31:07 +0900 Subject: [PATCH 08/30] Add batch tests --- catalyst/utils/metrics/__init__.py | 1 + catalyst/utils/metrics/mrr.py | 20 +++++++++++++------- catalyst/utils/metrics/tests/test_mrr.py | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/catalyst/utils/metrics/__init__.py b/catalyst/utils/metrics/__init__.py index e594d33b03..3e3ddfd357 100644 --- a/catalyst/utils/metrics/__init__.py +++ b/catalyst/utils/metrics/__init__.py @@ -9,3 +9,4 @@ from .focal import reduced_focal_loss, sigmoid_focal_loss from .iou import iou, jaccard from .precision import average_precision, mean_average_precision +from .mrr import mrr diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index ad8e151a97..8dfec68824 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -5,7 +5,9 @@ import torch -def mrr(outputs: torch.Tensor, targets: torch.Tensor): +def mrr(outputs: torch.Tensor, + targets: torch.Tensor + ) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets @@ -15,26 +17,30 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor): targets [batch_szie, slate_length] (torch.Tensor): ground truth, labels Returns: - mrr (float): the mrr score + mrr (float): the mrr score for each slate """ max_rank = targets.shape[0] - _, indices = outputs.sort(descending=True, dim=-1) - true_sorted_by_preds = torch.gather(targets, dim=0, index=indices) + _, indices_for_sort = outputs.sort(descending=True, dim=-1) + true_sorted_by_preds = torch.gather(targets, dim=-1, index=indices_for_sort) + print(true_sorted_by_preds) values, indices = torch.max(true_sorted_by_preds, dim=0) + # print(values) + # print(indices) indices = indices.type_as(values).unsqueeze(dim=0).t() - ats_rep = torch.tensor( + max_rank_rep = torch.tensor( data=max_rank, device=indices.device, dtype=torch.float32 ) - within_at_mask = (indices < ats_rep).type(torch.float32) + within_at_mask = (indices < max_rank_rep).type(torch.float32) result = torch.tensor(1.0) / (indices + torch.tensor(1.0)) + # print(result) zero_sum_mask = torch.sum(values) == 0.0 result[zero_sum_mask] = 0.0 mrr = result * within_at_mask - return mrr[0] + return mrr __all__ = ["mrr"] diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index b49a56dcc8..a4422c1d9f 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -21,3 +21,19 @@ def test_mrr(): mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) assert mrr == 0.5 + + # check 2 simple case + y_pred = [0.2, 0.5] + y_true = [0.0, 1.0] + + mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) + assert mrr == 1.0 + + #test batched slates + y_pred_1 = [0.2, 0.5] + y_pred_05 = [0.5, 0.2] + y_true = [0.0, 1.0] + + mrr = metrics.mrr(torch.Tensor([y_pred_1, y_pred_05]), torch.Tensor([y_true, y_true])) + assert mrr[0][0] == 1.0 + assert mrr[1][0] == 0.5 From a69273fbcd3f12aa39b78c34a519f8cb6a1a11fa Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 25 Jul 2020 22:33:23 +0900 Subject: [PATCH 09/30] edit changelog --- CHANGELOG.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 813f238c4c..6318441995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [20.07.1] - YYYY-MM-DD ### Added -<<<<<<< HEAD - MRR metrics calculation ([#886](https://github.com/catalyst-team/catalyst/pull/886)) -- -======= - - `CMCScoreCallback` ([#880](https://github.com/catalyst-team/catalyst/pull/880)) - kornia augmentations `BatchTransformCallback` ([#862](https://github.com/catalyst-team/catalyst/issues/862)) - `average_precision` and `mean_average_precision` metrics ([#883](https://github.com/catalyst-team/catalyst/pull/883)) - `MultiLabelAccuracyCallback`, `AveragePrecisionCallback` and `MeanAveragePrecisionCallback` callbacks ([#883](https://github.com/catalyst-team/catalyst/pull/883)) - minimal examples for multi-class and milti-label classification ([#883](https://github.com/catalyst-team/catalyst/pull/883)) ->>>>>>> master ### Changed From c425ccf2b775bf632d36440a5ce32dbaeec75ac6 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 25 Jul 2020 22:57:38 +0900 Subject: [PATCH 10/30] Add callbacks --- catalyst/dl/callbacks/metrics/__init__.py | 2 ++ catalyst/dl/callbacks/metrics/mrr.py | 40 +++++++++++++++++++++++ catalyst/utils/metrics/mrr.py | 4 --- 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 catalyst/dl/callbacks/metrics/mrr.py diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index adf6daa9b5..b927d443e4 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -26,3 +26,5 @@ AveragePrecisionCallback, MeanAveragePrecisionCallback, ) + +from catalyst.dl.callbacks.metrics.mrr import MRRCallback \ No newline at end of file diff --git a/catalyst/dl/callbacks/metrics/mrr.py b/catalyst/dl/callbacks/metrics/mrr.py new file mode 100644 index 0000000000..a18a1e0353 --- /dev/null +++ b/catalyst/dl/callbacks/metrics/mrr.py @@ -0,0 +1,40 @@ +from typing import List + +from catalyst.core import MetricCallback +from catalyst.utils import metrics + + +class MRRCallback(MetricCallback): + """Calculates the AUC per class for each loader. + + .. note:: + Currently, supports binary and multi-label cases. + """ + + def __init__( + self, + input_key: str = "targets", + output_key: str = "logits", + prefix: str = "mrr", + activation: str = "none", + ): + """ + Args: + input_key (str): input key to use for auc calculation + specifies our ``y_true`` + output_key (str): output key to use for auc calculation; + specifies our ``y_pred`` + prefix (str): name to display for mrr when printing + activation (str): An torch.nn activation applied to the outputs. + Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax2d'`` + """ + super().__init__( + prefix=prefix, + metric_fn=metrics.mrr, + input_key=input_key, + output_key=output_key, + activation=activation, + ) + + +__all__ = ["MRRCallback"] diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 8dfec68824..d3304d4786 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -23,10 +23,7 @@ def mrr(outputs: torch.Tensor, _, indices_for_sort = outputs.sort(descending=True, dim=-1) true_sorted_by_preds = torch.gather(targets, dim=-1, index=indices_for_sort) - print(true_sorted_by_preds) values, indices = torch.max(true_sorted_by_preds, dim=0) - # print(values) - # print(indices) indices = indices.type_as(values).unsqueeze(dim=0).t() max_rank_rep = torch.tensor( data=max_rank, device=indices.device, dtype=torch.float32 @@ -34,7 +31,6 @@ def mrr(outputs: torch.Tensor, within_at_mask = (indices < max_rank_rep).type(torch.float32) result = torch.tensor(1.0) / (indices + torch.tensor(1.0)) - # print(result) zero_sum_mask = torch.sum(values) == 0.0 result[zero_sum_mask] = 0.0 From 668196f521e47ebfd6632d4fd82f76ab76773841 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Mon, 27 Jul 2020 20:54:25 +0900 Subject: [PATCH 11/30] make codestyle --- catalyst/dl/callbacks/metrics/__init__.py | 2 +- catalyst/utils/metrics/mrr.py | 8 ++++---- catalyst/utils/metrics/tests/test_mrr.py | 6 ++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index b927d443e4..8b3b7e9ce0 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -27,4 +27,4 @@ MeanAveragePrecisionCallback, ) -from catalyst.dl.callbacks.metrics.mrr import MRRCallback \ No newline at end of file +from catalyst.dl.callbacks.metrics.mrr import MRRCallback diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index d3304d4786..eb2234dd32 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -5,9 +5,7 @@ import torch -def mrr(outputs: torch.Tensor, - targets: torch.Tensor - ) -> torch.Tensor: +def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets @@ -22,7 +20,9 @@ def mrr(outputs: torch.Tensor, max_rank = targets.shape[0] _, indices_for_sort = outputs.sort(descending=True, dim=-1) - true_sorted_by_preds = torch.gather(targets, dim=-1, index=indices_for_sort) + true_sorted_by_preds = torch.gather( + targets, dim=-1, index=indices_for_sort + ) values, indices = torch.max(true_sorted_by_preds, dim=0) indices = indices.type_as(values).unsqueeze(dim=0).t() max_rank_rep = torch.tensor( diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index a4422c1d9f..bc13fff0d1 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -29,11 +29,13 @@ def test_mrr(): mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) assert mrr == 1.0 - #test batched slates + # test batched slates y_pred_1 = [0.2, 0.5] y_pred_05 = [0.5, 0.2] y_true = [0.0, 1.0] - mrr = metrics.mrr(torch.Tensor([y_pred_1, y_pred_05]), torch.Tensor([y_true, y_true])) + mrr = metrics.mrr( + torch.Tensor([y_pred_1, y_pred_05]), torch.Tensor([y_true, y_true]) + ) assert mrr[0][0] == 1.0 assert mrr[1][0] == 0.5 From b626eeb09af0640a42cb2141066e7752618d1182 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Mon, 27 Jul 2020 21:12:26 +0900 Subject: [PATCH 12/30] small issues --- catalyst/dl/callbacks/metrics/__init__.py | 2 +- catalyst/utils/metrics/mrr.py | 4 ++-- docs/api/dl.rst | 7 +++++++ docs/api/utils.rst | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index 8b3b7e9ce0..b927d443e4 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -27,4 +27,4 @@ MeanAveragePrecisionCallback, ) -from catalyst.dl.callbacks.metrics.mrr import MRRCallback +from catalyst.dl.callbacks.metrics.mrr import MRRCallback \ No newline at end of file diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index eb2234dd32..ba03e896af 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -10,9 +10,9 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets Args: - outputs [batch_size, slate_length] (torch.Tensor): + outputs [batch_size, slate_length] (torch.Tensor): model outputs, logits - targets [batch_szie, slate_length] (torch.Tensor): + targets [batch_szie, slate_length] (torch.Tensor): ground truth, labels Returns: mrr (float): the mrr score for each slate diff --git a/docs/api/dl.rst b/docs/api/dl.rst index 4f6c2ffb0d..3d4c77be0e 100644 --- a/docs/api/dl.rst +++ b/docs/api/dl.rst @@ -139,6 +139,13 @@ Global precision, recall and F1-score :undoc-members: :show-inheritance: +MRR +~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: catalyst.dl.callbacks.metrics.mrr + :members: + :undoc-members: + :show-inheritance: + Utils -------------------- diff --git a/docs/api/utils.rst b/docs/api/utils.rst index 660a549458..4ab4cbe41b 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -280,3 +280,10 @@ Precision :members: :undoc-members: :show-inheritance: + +MRR +~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: catalyst.utils.metrics.mrr + :members: + :undoc-members: + :show-inheritance: From 7f33538ee63cab8f99c4707f4d8192e49daaedc1 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 28 Jul 2020 14:35:20 +0900 Subject: [PATCH 13/30] add newline at the end of the file --- catalyst/dl/callbacks/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index b927d443e4..8b3b7e9ce0 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -27,4 +27,4 @@ MeanAveragePrecisionCallback, ) -from catalyst.dl.callbacks.metrics.mrr import MRRCallback \ No newline at end of file +from catalyst.dl.callbacks.metrics.mrr import MRRCallback From 06e67616d868a6043140716c72ef4467e75d0c62 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 28 Jul 2020 14:41:21 +0900 Subject: [PATCH 14/30] small issues --- catalyst/dl/callbacks/metrics/mrr.py | 2 -- catalyst/utils/metrics/mrr.py | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/catalyst/dl/callbacks/metrics/mrr.py b/catalyst/dl/callbacks/metrics/mrr.py index a18a1e0353..46b6c761b0 100644 --- a/catalyst/dl/callbacks/metrics/mrr.py +++ b/catalyst/dl/callbacks/metrics/mrr.py @@ -1,5 +1,3 @@ -from typing import List - from catalyst.core import MetricCallback from catalyst.utils import metrics diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index ba03e896af..5ffede4cba 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -14,6 +14,7 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: model outputs, logits targets [batch_szie, slate_length] (torch.Tensor): ground truth, labels + Returns: mrr (float): the mrr score for each slate """ From 04caa3114f27053a690fe08bf1915d0554c1bf0f Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 28 Jul 2020 19:44:04 +0900 Subject: [PATCH 15/30] add movielens --- catalyst/contrib/datasets/movie_lens.py | 110 ++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 catalyst/contrib/datasets/movie_lens.py diff --git a/catalyst/contrib/datasets/movie_lens.py b/catalyst/contrib/datasets/movie_lens.py new file mode 100644 index 0000000000..5263350dd2 --- /dev/null +++ b/catalyst/contrib/datasets/movie_lens.py @@ -0,0 +1,110 @@ +import zipfile +import os + +import numpy as np + +import torch +from torch.utils.data import Dataset + +from catalyst.contrib.datasets.utils import download_and_extract_archive + +class MovieLens(Dataset): + ''' + MovieLens data sets were collected by the GroupLens Research Project + at the University of Minnesota. + + This data set consists of: + * 100,000 ratings (1-5) from 943 users on 1682 movies. + * Each user has rated at least 20 movies. + * Simple demographic info for the users (age, gender, occupation, zip) + + The data was collected through the MovieLens web site + (movielens.umn.edu) during the seven-month period from September 19th, + 1997 through April 22nd, 1998. This data has been cleaned up - users + who had less than 20 ratings or did not have complete demographic + information were removed from this data set. Detailed descriptions of + the data file can be found at the end of this file. + + Neither the University of Minnesota nor any of the researchers + involved can guarantee the correctness of the data, its suitability + for any particular purpose, or the validity of results based on the + use of the data set. The data set may be used for any research + purposes under the following conditions: + + * The user may not state or imply any endorsement from the + University of Minnesota or the GroupLens Research Group. + + * The user must acknowledge the use of the data set in + publications resulting from the use of the data set + (see below for citation information). + + * The user may not redistribute the data without separate + permission. + + * The user may not use this information for any commercial or + revenue-bearing purposes without first obtaining permission + from a faculty member of the GroupLens Research Project at the + University of Minnesota. + + If you have any further questions or comments, please contact GroupLens + . + http://files.grouplens.org/datasets/movielens/ml-100k-README.txt + ''' + + resources = ( + "http://files.grouplens.org/datasets/movielens/ml-100k.zip", + "6f5ca7e518b6970ec2265ce66a80ffdc" + ) + filename = 'ml-100k' + + + def __init__(self): + pass + + @property + def raw_folder(self): + """ + Create raw folder for data download + """ + return os.path.join(self.root, self.__class__.__name__, "raw") + + @property + def processed_folder(self): + """@TODO: Docs. Contribution is welcome.""" + return os.path.join(self.root, self.__class__.__name__, "processed") + + def _check_exists(self): + + return os.path.exists( + os.path.join(self.processed_folder, self.training_file) + ) and os.path.exists( + os.path.join(self.processed_folder, self.test_file) + ) + + def _downlaod(self): + if self._check_exists(): + return + + os.makedirs(self.raw_folder, exist_ok=True) + os.makedirs(self.processed_folder, exist_ok=True) + + def _get_raw_movielens_data(self): + """ + Download movielens data if it doesn't exsit + """ + + path = self._get_movielens_path() + + if not os.path.isfile(path): + download_and_extract_archive( + url = self.resources[0] + download_root = + ) + + with zipfile.ZipFile(path) as datafile: + return (datafile.read('ml-100k/ua.base').decode().split('\n'), + datafile.read('ml-100k/ua.test').decode().split('\n')) + + if self._check_exist(): + return + \ No newline at end of file From d9d776402b13b37f7e5f50401f99ef543f63e92c Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 30 Jul 2020 14:47:18 +0900 Subject: [PATCH 16/30] minor improvements --- catalyst/utils/metrics/tests/test_mrr.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index bc13fff0d1..995f67bce9 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -7,7 +7,6 @@ def test_mrr(): """ Tests for catalyst.utils.metrics.mrr metric. """ - # check 0 simple case y_pred = [0.5, 0.2] y_true = [1.0, 0.0] @@ -30,12 +29,12 @@ def test_mrr(): assert mrr == 1.0 # test batched slates - y_pred_1 = [0.2, 0.5] - y_pred_05 = [0.5, 0.2] + y_pred1 = [0.2, 0.5] + y_pred05 = [0.5, 0.2] y_true = [0.0, 1.0] mrr = metrics.mrr( - torch.Tensor([y_pred_1, y_pred_05]), torch.Tensor([y_true, y_true]) + torch.Tensor([y_pred1, y_pred05]), torch.Tensor([y_true, y_true]) ) assert mrr[0][0] == 1.0 assert mrr[1][0] == 0.5 From 8bf67009c6d6194e8556e9bff1d5fe8ca4f2d99e Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 30 Jul 2020 15:18:32 +0900 Subject: [PATCH 17/30] minor improvements --- catalyst/utils/metrics/mrr.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 5ffede4cba..3907434558 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -10,11 +10,13 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets Args: - outputs [batch_size, slate_length] (torch.Tensor): + outputs (torch.Tensor): + size: [batch_size, slate_length] model outputs, logits - targets [batch_szie, slate_length] (torch.Tensor): + targets (torch.Tensor): + size: [batch_szie, slate_length] ground truth, labels - + Returns: mrr (float): the mrr score for each slate """ @@ -36,8 +38,7 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: zero_sum_mask = torch.sum(values) == 0.0 result[zero_sum_mask] = 0.0 - mrr = result * within_at_mask - return mrr + return result * within_at_mask __all__ = ["mrr"] From 49b15a4c27e923d523a848f2e1123fb464d87d3d Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Mon, 3 Aug 2020 01:01:59 +0900 Subject: [PATCH 18/30] removed activation --- catalyst/dl/callbacks/metrics/mrr.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/catalyst/dl/callbacks/metrics/mrr.py b/catalyst/dl/callbacks/metrics/mrr.py index 46b6c761b0..295d1f1762 100644 --- a/catalyst/dl/callbacks/metrics/mrr.py +++ b/catalyst/dl/callbacks/metrics/mrr.py @@ -13,8 +13,7 @@ def __init__( self, input_key: str = "targets", output_key: str = "logits", - prefix: str = "mrr", - activation: str = "none", + prefix: str = "mrr" ): """ Args: @@ -23,15 +22,12 @@ def __init__( output_key (str): output key to use for auc calculation; specifies our ``y_pred`` prefix (str): name to display for mrr when printing - activation (str): An torch.nn activation applied to the outputs. - Must be one of ``'none'``, ``'Sigmoid'``, or ``'Softmax2d'`` """ super().__init__( prefix=prefix, metric_fn=metrics.mrr, input_key=input_key, - output_key=output_key, - activation=activation, + output_key=output_key ) From 80920f06b00acc6497b0c7717cdfa231d3b067fa Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Wed, 2 Sep 2020 20:56:53 +0900 Subject: [PATCH 19/30] add at k support --- catalyst/utils/metrics/__init__.py | 14 +++----------- catalyst/utils/metrics/mrr.py | 7 +++++-- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/catalyst/utils/metrics/__init__.py b/catalyst/utils/metrics/__init__.py index fca093d4e7..67d3a79f2c 100644 --- a/catalyst/utils/metrics/__init__.py +++ b/catalyst/utils/metrics/__init__.py @@ -3,19 +3,12 @@ accuracy, multi_label_accuracy, ) -<<<<<<< HEAD -from .cmc_score import cmc_score_count, cmc_score -from .dice import dice -from .f1_score import f1_score -from .focal import reduced_focal_loss, sigmoid_focal_loss -from .iou import iou, jaccard -from .precision import average_precision, mean_average_precision -from .mrr import mrr -======= + from catalyst.utils.metrics.auc import auc from catalyst.utils.metrics.cmc_score import cmc_score_count, cmc_score from catalyst.utils.metrics.dice import dice, calculate_dice from catalyst.utils.metrics.f1_score import f1_score +from catalyst.utils.metrics.mrr import mrr from catalyst.utils.metrics.focal import reduced_focal_loss, sigmoid_focal_loss from catalyst.utils.metrics.iou import iou, jaccard from catalyst.utils.metrics.precision import average_precision @@ -23,5 +16,4 @@ get_default_topk_args, wrap_class_metric2dict, wrap_topk_metric2dict, -) ->>>>>>> master +) \ No newline at end of file diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 3907434558..d6d6c7bdb2 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -5,7 +5,7 @@ import torch -def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: +def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets @@ -21,6 +21,9 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: mrr (float): the mrr score for each slate """ max_rank = targets.shape[0] + # print(targets.size()) + # if len(targets.size()) > 2: + # k = min(targets.size()[1], k) _, indices_for_sort = outputs.sort(descending=True, dim=-1) true_sorted_by_preds = torch.gather( @@ -37,7 +40,7 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: zero_sum_mask = torch.sum(values) == 0.0 result[zero_sum_mask] = 0.0 - + # print(result*within_at_mask) return result * within_at_mask From 84b7b5239e8265d39b902e2cdfc5f48db76a61e0 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sun, 6 Sep 2020 16:11:16 +0900 Subject: [PATCH 20/30] add mrr computations --- catalyst/contrib/datasets/movie_lens.py | 4 +- catalyst/dl/callbacks/metrics/__init__.py | 9 ----- catalyst/dl/callbacks/metrics/mrr.py | 4 +- catalyst/utils/metrics/__init__.py | 2 +- catalyst/utils/metrics/mrr.py | 24 +++++------- catalyst/utils/metrics/tests/test_mrr.py | 47 +++++++++++++++++++---- docs/api/dl.rst | 6 --- 7 files changed, 54 insertions(+), 42 deletions(-) diff --git a/catalyst/contrib/datasets/movie_lens.py b/catalyst/contrib/datasets/movie_lens.py index 5263350dd2..be7f2d77b7 100644 --- a/catalyst/contrib/datasets/movie_lens.py +++ b/catalyst/contrib/datasets/movie_lens.py @@ -1,5 +1,5 @@ -import zipfile import os +import zipfile import numpy as np @@ -8,6 +8,7 @@ from catalyst.contrib.datasets.utils import download_and_extract_archive + class MovieLens(Dataset): ''' MovieLens data sets were collected by the GroupLens Research Project @@ -107,4 +108,3 @@ def _get_raw_movielens_data(self): if self._check_exist(): return - \ No newline at end of file diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index e523606def..fd61543a97 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -22,13 +22,4 @@ PrecisionRecallF1ScoreCallback, ) -<<<<<<< HEAD -from catalyst.dl.callbacks.metrics.precision import ( - AveragePrecisionCallback, - MeanAveragePrecisionCallback, -) - -from catalyst.dl.callbacks.metrics.mrr import MRRCallback -======= from catalyst.dl.callbacks.metrics.precision import AveragePrecisionCallback ->>>>>>> master diff --git a/catalyst/dl/callbacks/metrics/mrr.py b/catalyst/dl/callbacks/metrics/mrr.py index 295d1f1762..7f72ffc1fd 100644 --- a/catalyst/dl/callbacks/metrics/mrr.py +++ b/catalyst/dl/callbacks/metrics/mrr.py @@ -13,7 +13,7 @@ def __init__( self, input_key: str = "targets", output_key: str = "logits", - prefix: str = "mrr" + prefix: str = "mrr", ): """ Args: @@ -27,7 +27,7 @@ def __init__( prefix=prefix, metric_fn=metrics.mrr, input_key=input_key, - output_key=output_key + output_key=output_key, ) diff --git a/catalyst/utils/metrics/__init__.py b/catalyst/utils/metrics/__init__.py index 67d3a79f2c..97bbd49daf 100644 --- a/catalyst/utils/metrics/__init__.py +++ b/catalyst/utils/metrics/__init__.py @@ -16,4 +16,4 @@ get_default_topk_args, wrap_class_metric2dict, wrap_topk_metric2dict, -) \ No newline at end of file +) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index d6d6c7bdb2..1423321fc3 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -9,39 +9,35 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets + Users data represnted via batches. Args: outputs (torch.Tensor): + Tensor weith predicted score size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): + Binary tensor with ground truth. 1 means the item is relevant + for the user and 0 not relevant size: [batch_szie, slate_length] ground truth, labels Returns: mrr (float): the mrr score for each slate """ - max_rank = targets.shape[0] - # print(targets.size()) - # if len(targets.size()) > 2: - # k = min(targets.size()[1], k) - + k = min(outputs.size()[1], k) _, indices_for_sort = outputs.sort(descending=True, dim=-1) true_sorted_by_preds = torch.gather( targets, dim=-1, index=indices_for_sort ) - values, indices = torch.max(true_sorted_by_preds, dim=0) - indices = indices.type_as(values).unsqueeze(dim=0).t() - max_rank_rep = torch.tensor( - data=max_rank, device=indices.device, dtype=torch.float32 - ) - within_at_mask = (indices < max_rank_rep).type(torch.float32) + true_sorted_by_pred_shrink = true_sorted_by_preds[:, :k] + values, indices = torch.max(true_sorted_by_pred_shrink, dim=1) + indices = indices.type_as(values).unsqueeze(dim=0).t() result = torch.tensor(1.0) / (indices + torch.tensor(1.0)) - zero_sum_mask = torch.sum(values) == 0.0 + zero_sum_mask = values == 0.0 result[zero_sum_mask] = 0.0 - # print(result*within_at_mask) - return result * within_at_mask + return result __all__ = ["mrr"] diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/utils/metrics/tests/test_mrr.py index 995f67bce9..b60e787e22 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/utils/metrics/tests/test_mrr.py @@ -7,28 +7,30 @@ def test_mrr(): """ Tests for catalyst.utils.metrics.mrr metric. """ - # check 0 simple case + # # check 0 simple case y_pred = [0.5, 0.2] y_true = [1.0, 0.0] - mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) - assert mrr == 1 + mrr = metrics.mrr(torch.Tensor([y_pred]), torch.Tensor([y_true])) + assert mrr[0][0] == 1 # check 1 simple case y_pred = [0.5, 0.2] y_true = [0.0, 1.0] - mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) - assert mrr == 0.5 + mrr = metrics.mrr(torch.Tensor([y_pred]), torch.Tensor([y_true])) + # mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) + assert mrr[0][0] == 0.5 + # assert mrr == 0.5 # check 2 simple case y_pred = [0.2, 0.5] y_true = [0.0, 1.0] - mrr = metrics.mrr(torch.Tensor(y_pred), torch.Tensor(y_true)) - assert mrr == 1.0 + mrr = metrics.mrr(torch.Tensor([y_pred]), torch.Tensor([y_true])) + assert mrr[0][0] == 1.0 - # test batched slates + # check 3 test multiple users y_pred1 = [0.2, 0.5] y_pred05 = [0.5, 0.2] y_true = [0.0, 1.0] @@ -38,3 +40,32 @@ def test_mrr(): ) assert mrr[0][0] == 1.0 assert mrr[1][0] == 0.5 + + # check 4 test with k + y_pred1 = [4.0, 2.0, 3.0, 1.0] + y_pred2 = [1.0, 2.0, 3.0, 4.0] + y_true1 = [0, 0, 1.0, 1.0] + y_true2 = [0, 0, 1.0, 1.0] + + y_pred_torch = torch.Tensor([y_pred1, y_pred2]) + y_true_torch = torch.Tensor([y_true1, y_true2]) + + mrr = metrics.mrr(y_pred_torch, y_true_torch, k=3) + + assert mrr[0][0] == 0.5 + assert mrr[1][0] == 1.0 + + # check 5 test with k + + y_pred1 = [4.0, 2.0, 3.0, 1.0] + y_pred2 = [1.0, 2.0, 3.0, 4.0] + y_true1 = [0, 0, 1.0, 1.0] + y_true2 = [0, 0, 1.0, 1.0] + + y_pred_torch = torch.Tensor([y_pred1, y_pred2]) + y_true_torch = torch.Tensor([y_true1, y_true2]) + + mrr = metrics.mrr(y_pred_torch, y_true_torch, k=1) + + assert mrr[0][0] == 0.0 + assert mrr[1][0] == 1.0 diff --git a/docs/api/dl.rst b/docs/api/dl.rst index 549628fe1c..6d155ca28c 100644 --- a/docs/api/dl.rst +++ b/docs/api/dl.rst @@ -159,15 +159,9 @@ Global precision, recall and F1-score :undoc-members: :show-inheritance: -<<<<<<< HEAD -MRR -~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.dl.callbacks.metrics.mrr -======= Precision ~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: catalyst.dl.callbacks.metrics.precision ->>>>>>> master :members: :undoc-members: :show-inheritance: From 3acb341964c2ca06674e73c078ee2df54d41646a Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sun, 6 Sep 2020 16:22:44 +0900 Subject: [PATCH 21/30] deleted dataset from another branch --- catalyst/contrib/datasets/movie_lens.py | 110 ------------------------ 1 file changed, 110 deletions(-) delete mode 100644 catalyst/contrib/datasets/movie_lens.py diff --git a/catalyst/contrib/datasets/movie_lens.py b/catalyst/contrib/datasets/movie_lens.py deleted file mode 100644 index be7f2d77b7..0000000000 --- a/catalyst/contrib/datasets/movie_lens.py +++ /dev/null @@ -1,110 +0,0 @@ -import os -import zipfile - -import numpy as np - -import torch -from torch.utils.data import Dataset - -from catalyst.contrib.datasets.utils import download_and_extract_archive - - -class MovieLens(Dataset): - ''' - MovieLens data sets were collected by the GroupLens Research Project - at the University of Minnesota. - - This data set consists of: - * 100,000 ratings (1-5) from 943 users on 1682 movies. - * Each user has rated at least 20 movies. - * Simple demographic info for the users (age, gender, occupation, zip) - - The data was collected through the MovieLens web site - (movielens.umn.edu) during the seven-month period from September 19th, - 1997 through April 22nd, 1998. This data has been cleaned up - users - who had less than 20 ratings or did not have complete demographic - information were removed from this data set. Detailed descriptions of - the data file can be found at the end of this file. - - Neither the University of Minnesota nor any of the researchers - involved can guarantee the correctness of the data, its suitability - for any particular purpose, or the validity of results based on the - use of the data set. The data set may be used for any research - purposes under the following conditions: - - * The user may not state or imply any endorsement from the - University of Minnesota or the GroupLens Research Group. - - * The user must acknowledge the use of the data set in - publications resulting from the use of the data set - (see below for citation information). - - * The user may not redistribute the data without separate - permission. - - * The user may not use this information for any commercial or - revenue-bearing purposes without first obtaining permission - from a faculty member of the GroupLens Research Project at the - University of Minnesota. - - If you have any further questions or comments, please contact GroupLens - . - http://files.grouplens.org/datasets/movielens/ml-100k-README.txt - ''' - - resources = ( - "http://files.grouplens.org/datasets/movielens/ml-100k.zip", - "6f5ca7e518b6970ec2265ce66a80ffdc" - ) - filename = 'ml-100k' - - - def __init__(self): - pass - - @property - def raw_folder(self): - """ - Create raw folder for data download - """ - return os.path.join(self.root, self.__class__.__name__, "raw") - - @property - def processed_folder(self): - """@TODO: Docs. Contribution is welcome.""" - return os.path.join(self.root, self.__class__.__name__, "processed") - - def _check_exists(self): - - return os.path.exists( - os.path.join(self.processed_folder, self.training_file) - ) and os.path.exists( - os.path.join(self.processed_folder, self.test_file) - ) - - def _downlaod(self): - if self._check_exists(): - return - - os.makedirs(self.raw_folder, exist_ok=True) - os.makedirs(self.processed_folder, exist_ok=True) - - def _get_raw_movielens_data(self): - """ - Download movielens data if it doesn't exsit - """ - - path = self._get_movielens_path() - - if not os.path.isfile(path): - download_and_extract_archive( - url = self.resources[0] - download_root = - ) - - with zipfile.ZipFile(path) as datafile: - return (datafile.read('ml-100k/ua.base').decode().split('\n'), - datafile.read('ml-100k/ua.test').decode().split('\n')) - - if self._check_exist(): - return From 5a16261d600afab0a86b64470a9d25194f0b7786 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sun, 6 Sep 2020 17:15:30 +0900 Subject: [PATCH 22/30] WIP merr calcback tests --- .../dl/callbacks/metrics/tests/___init__.py | 0 .../dl/callbacks/metrics/tests/test_mrr.py | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 catalyst/dl/callbacks/metrics/tests/___init__.py create mode 100644 catalyst/dl/callbacks/metrics/tests/test_mrr.py diff --git a/catalyst/dl/callbacks/metrics/tests/___init__.py b/catalyst/dl/callbacks/metrics/tests/___init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/catalyst/dl/callbacks/metrics/tests/test_mrr.py b/catalyst/dl/callbacks/metrics/tests/test_mrr.py new file mode 100644 index 0000000000..06c22ee28d --- /dev/null +++ b/catalyst/dl/callbacks/metrics/tests/test_mrr.py @@ -0,0 +1,34 @@ +import torch +from torch.utils.data import DataLoader, TensorDataset + +from catalyst.dl import ( + MRRCallback, SupervisedRunner, + SchedulerCallback +) + +num_samples, num_features = 10_000, 10 +n_classes = 10 +X = torch.rand(num_samples, num_features) +y = torch.randint(0, n_classes, [num_samples]) +loader = DataLoader(TensorDataset(X, y), batch_size=32, num_workers=1) +loaders = {"train": loader, "valid": loader} + +# mdeol +model = torch.nn.Linear(num_features, n_classes) +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters()) +scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + +# model training +runner = SupervisedRunner() +runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=2, + verbose=True, + callbacks=[MRRCallback, SchedulerCallback(reduced_metric="loss")] +) \ No newline at end of file From e192837810a249d2381adaf1cd46b22edb18cab0 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 10 Sep 2020 09:57:10 +0900 Subject: [PATCH 23/30] minor changes --- catalyst/utils/metrics/mrr.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 1423321fc3..9913858975 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -9,20 +9,23 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: """ Calculate the MRR score given model ouptputs and targets - Users data represnted via batches. + Users data aggreagtesd in batches. Args: outputs (torch.Tensor): Tensor weith predicted score size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): - Binary tensor with ground truth. 1 means the item is relevant + Binary tensor with ground truth. + 1 means the item is relevant for the user and 0 not relevant size: [batch_szie, slate_length] ground truth, labels Returns: - mrr (float): the mrr score for each slate + result (torch.Tensor): + The mrr score for each user. + """ k = min(outputs.size()[1], k) _, indices_for_sort = outputs.sort(descending=True, dim=-1) From 2230a5be97b57888df6d517ea0c8b523e9285e0a Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 12 Sep 2020 18:44:57 +0900 Subject: [PATCH 24/30] alphabetical order of the imports --- catalyst/utils/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/utils/metrics/__init__.py b/catalyst/utils/metrics/__init__.py index 97bbd49daf..7d7fb3625a 100644 --- a/catalyst/utils/metrics/__init__.py +++ b/catalyst/utils/metrics/__init__.py @@ -8,9 +8,9 @@ from catalyst.utils.metrics.cmc_score import cmc_score_count, cmc_score from catalyst.utils.metrics.dice import dice, calculate_dice from catalyst.utils.metrics.f1_score import f1_score -from catalyst.utils.metrics.mrr import mrr from catalyst.utils.metrics.focal import reduced_focal_loss, sigmoid_focal_loss from catalyst.utils.metrics.iou import iou, jaccard +from catalyst.utils.metrics.mrr import mrr from catalyst.utils.metrics.precision import average_precision from catalyst.utils.metrics.functional import ( get_default_topk_args, From f632cc11219dc61e28183f643a036e4eefffae99 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sat, 12 Sep 2020 18:47:23 +0900 Subject: [PATCH 25/30] add new line at the end of py file --- catalyst/dl/callbacks/metrics/tests/test_mrr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/dl/callbacks/metrics/tests/test_mrr.py b/catalyst/dl/callbacks/metrics/tests/test_mrr.py index 06c22ee28d..ee2cfed2b7 100644 --- a/catalyst/dl/callbacks/metrics/tests/test_mrr.py +++ b/catalyst/dl/callbacks/metrics/tests/test_mrr.py @@ -31,4 +31,4 @@ num_epochs=2, verbose=True, callbacks=[MRRCallback, SchedulerCallback(reduced_metric="loss")] -) \ No newline at end of file +) From 470cc991617b188ae66891e9352429cc9bc7ad86 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 22 Sep 2020 23:04:41 +0900 Subject: [PATCH 26/30] fixed small issues --- catalyst/dl/callbacks/metrics/__init__.py | 1 + .../dl/callbacks/metrics/tests/test_mrr.py | 24 +++++++++---------- catalyst/utils/metrics/mrr.py | 13 ++++++++-- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/catalyst/dl/callbacks/metrics/__init__.py b/catalyst/dl/callbacks/metrics/__init__.py index fd61543a97..89ddc35268 100644 --- a/catalyst/dl/callbacks/metrics/__init__.py +++ b/catalyst/dl/callbacks/metrics/__init__.py @@ -18,6 +18,7 @@ IouCallback, JaccardCallback, ) +from catalyst.dl.callbacks.metrics.mrr import MRRCallback from catalyst.dl.callbacks.metrics.ppv_tpr_f1 import ( PrecisionRecallF1ScoreCallback, ) diff --git a/catalyst/dl/callbacks/metrics/tests/test_mrr.py b/catalyst/dl/callbacks/metrics/tests/test_mrr.py index ee2cfed2b7..cf37dfc7cc 100644 --- a/catalyst/dl/callbacks/metrics/tests/test_mrr.py +++ b/catalyst/dl/callbacks/metrics/tests/test_mrr.py @@ -20,15 +20,15 @@ scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) # model training -runner = SupervisedRunner() -runner.train( - model=model, - criterion=criterion, - optimizer=optimizer, - scheduler=scheduler, - loaders=loaders, - logdir="./logdir", - num_epochs=2, - verbose=True, - callbacks=[MRRCallback, SchedulerCallback(reduced_metric="loss")] -) +# runner = SupervisedRunner() +# runner.train( +# model=model, +# criterion=criterion, +# optimizer=optimizer, +# scheduler=scheduler, +# loaders=loaders, +# logdir="./logdir", +# num_epochs=2, +# verbose=True, +# callbacks=[MRRCallback, SchedulerCallback(reduced_metric="loss")] +# ) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 9913858975..589c52b1ae 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -8,8 +8,13 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: """ - Calculate the MRR score given model ouptputs and targets + Calculate the Mean Reciprocal Rank (MRR) score given model ouptputs and targets Users data aggreagtesd in batches. + + The MRR@k is the mean overall user of the reciprocal rank, that is + the rank of the highest ranked relevant item, if any in the top *k*, 0 otherwise. + https://en.wikipedia.org/wiki/Mean_reciprocal_rank + Args: outputs (torch.Tensor): Tensor weith predicted score @@ -21,13 +26,17 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: for the user and 0 not relevant size: [batch_szie, slate_length] ground truth, labels + k (int): + Parameter fro evaluation on top-k items Returns: result (torch.Tensor): The mrr score for each user. + + Examples """ - k = min(outputs.size()[1], k) + k = min(outputs.size(1), k) _, indices_for_sort = outputs.sort(descending=True, dim=-1) true_sorted_by_preds = torch.gather( targets, dim=-1, index=indices_for_sort From 8127eb8f37eec7b04153f05a6287381212eb0094 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Tue, 29 Sep 2020 17:23:30 +0900 Subject: [PATCH 27/30] changed the codestyle --- catalyst/dl/callbacks/metrics/mrr.py | 4 +-- .../tests/{___init__.py => __init__.py} | 0 .../dl/callbacks/metrics/tests/test_mrr.py | 34 ------------------- catalyst/utils/metrics/mrr.py | 20 +++++------ 4 files changed, 11 insertions(+), 47 deletions(-) rename catalyst/dl/callbacks/metrics/tests/{___init__.py => __init__.py} (100%) delete mode 100644 catalyst/dl/callbacks/metrics/tests/test_mrr.py diff --git a/catalyst/dl/callbacks/metrics/mrr.py b/catalyst/dl/callbacks/metrics/mrr.py index 7f72ffc1fd..29ea0c003b 100644 --- a/catalyst/dl/callbacks/metrics/mrr.py +++ b/catalyst/dl/callbacks/metrics/mrr.py @@ -17,9 +17,9 @@ def __init__( ): """ Args: - input_key (str): input key to use for auc calculation + input_key (str): input key to use for mrr calculation specifies our ``y_true`` - output_key (str): output key to use for auc calculation; + output_key (str): output key to use for mrr calculation; specifies our ``y_pred`` prefix (str): name to display for mrr when printing """ diff --git a/catalyst/dl/callbacks/metrics/tests/___init__.py b/catalyst/dl/callbacks/metrics/tests/__init__.py similarity index 100% rename from catalyst/dl/callbacks/metrics/tests/___init__.py rename to catalyst/dl/callbacks/metrics/tests/__init__.py diff --git a/catalyst/dl/callbacks/metrics/tests/test_mrr.py b/catalyst/dl/callbacks/metrics/tests/test_mrr.py deleted file mode 100644 index cf37dfc7cc..0000000000 --- a/catalyst/dl/callbacks/metrics/tests/test_mrr.py +++ /dev/null @@ -1,34 +0,0 @@ -import torch -from torch.utils.data import DataLoader, TensorDataset - -from catalyst.dl import ( - MRRCallback, SupervisedRunner, - SchedulerCallback -) - -num_samples, num_features = 10_000, 10 -n_classes = 10 -X = torch.rand(num_samples, num_features) -y = torch.randint(0, n_classes, [num_samples]) -loader = DataLoader(TensorDataset(X, y), batch_size=32, num_workers=1) -loaders = {"train": loader, "valid": loader} - -# mdeol -model = torch.nn.Linear(num_features, n_classes) -criterion = torch.nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(model.parameters()) -scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) - -# model training -# runner = SupervisedRunner() -# runner.train( -# model=model, -# criterion=criterion, -# optimizer=optimizer, -# scheduler=scheduler, -# loaders=loaders, -# logdir="./logdir", -# num_epochs=2, -# verbose=True, -# callbacks=[MRRCallback, SchedulerCallback(reduced_metric="loss")] -# ) diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/utils/metrics/mrr.py index 589c52b1ae..cce322e27b 100644 --- a/catalyst/utils/metrics/mrr.py +++ b/catalyst/utils/metrics/mrr.py @@ -6,22 +6,23 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: - """ - Calculate the Mean Reciprocal Rank (MRR) score given model ouptputs and targets - Users data aggreagtesd in batches. + Calculate the Mean Reciprocal Rank (MRR) + score given model ouptputs and targets + User's data aggreagtesd in batches. - The MRR@k is the mean overall user of the reciprocal rank, that is - the rank of the highest ranked relevant item, if any in the top *k*, 0 otherwise. + The MRR@k is the mean overall user of the + reciprocal rank, that is the rank of the highest + ranked relevant item, if any in the top *k*, 0 otherwise. https://en.wikipedia.org/wiki/Mean_reciprocal_rank Args: outputs (torch.Tensor): Tensor weith predicted score - size: [batch_size, slate_length] + size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): - Binary tensor with ground truth. + Binary tensor with ground truth. 1 means the item is relevant for the user and 0 not relevant size: [batch_szie, slate_length] @@ -30,11 +31,8 @@ def mrr(outputs: torch.Tensor, targets: torch.Tensor, k=100) -> torch.Tensor: Parameter fro evaluation on top-k items Returns: - result (torch.Tensor): + result (torch.Tensor): The mrr score for each user. - - Examples - """ k = min(outputs.size(1), k) _, indices_for_sort = outputs.sort(descending=True, dim=-1) From b2fa81a6c718f6890708a0f6d40db5a9c1fb927d Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 8 Oct 2020 13:07:39 +0900 Subject: [PATCH 28/30] moved files to metrics --- CHANGELOG.md | 2 +- catalyst/metrics/__init__.py | 2 ++ catalyst/{utils => }/metrics/mrr.py | 0 catalyst/{utils => }/metrics/tests/test_mrr.py | 2 +- catalyst/utils/metrics/__init__.py | 1 - 5 files changed, 4 insertions(+), 3 deletions(-) rename catalyst/{utils => }/metrics/mrr.py (100%) rename catalyst/{utils => }/metrics/tests/test_mrr.py (98%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98e6b58396..ff40730540 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- MRR metrics calculation ([#886](https://github.com/catalyst-team/catalyst/pull/886)) - docs for MetricCallbacks ([#947](https://github.com/catalyst-team/catalyst/pull/947)) - SoftMax, CosFace, ArcFace layers to contrib ([#939](https://github.com/catalyst-team/catalyst/pull/939)) @@ -90,7 +91,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [20.08] - 2020-08-09 ### Added -- MRR metrics calculation ([#886](https://github.com/catalyst-team/catalyst/pull/886)) - `CMCScoreCallback` ([#880](https://github.com/catalyst-team/catalyst/pull/880)) - kornia augmentations `BatchTransformCallback` ([#862](https://github.com/catalyst-team/catalyst/issues/862)) - `average_precision` and `mean_average_precision` metrics ([#883](https://github.com/catalyst-team/catalyst/pull/883)) diff --git a/catalyst/metrics/__init__.py b/catalyst/metrics/__init__.py index e69de29bb2..41b0b89978 100644 --- a/catalyst/metrics/__init__.py +++ b/catalyst/metrics/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from catalyst.utils.metrics.mrr import mrr diff --git a/catalyst/utils/metrics/mrr.py b/catalyst/metrics/mrr.py similarity index 100% rename from catalyst/utils/metrics/mrr.py rename to catalyst/metrics/mrr.py diff --git a/catalyst/utils/metrics/tests/test_mrr.py b/catalyst/metrics/tests/test_mrr.py similarity index 98% rename from catalyst/utils/metrics/tests/test_mrr.py rename to catalyst/metrics/tests/test_mrr.py index b60e787e22..384d450ca3 100644 --- a/catalyst/utils/metrics/tests/test_mrr.py +++ b/catalyst/metrics/tests/test_mrr.py @@ -1,6 +1,6 @@ import torch -from catalyst.utils import metrics +from catalyst import metrics def test_mrr(): diff --git a/catalyst/utils/metrics/__init__.py b/catalyst/utils/metrics/__init__.py index 7d7fb3625a..49cd570bcb 100644 --- a/catalyst/utils/metrics/__init__.py +++ b/catalyst/utils/metrics/__init__.py @@ -10,7 +10,6 @@ from catalyst.utils.metrics.f1_score import f1_score from catalyst.utils.metrics.focal import reduced_focal_loss, sigmoid_focal_loss from catalyst.utils.metrics.iou import iou, jaccard -from catalyst.utils.metrics.mrr import mrr from catalyst.utils.metrics.precision import average_precision from catalyst.utils.metrics.functional import ( get_default_topk_args, From 5f51a38cdb5c3e1863b7399996cd268cc4b45bef Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Thu, 8 Oct 2020 13:37:32 +0900 Subject: [PATCH 29/30] fixed typos --- catalyst/metrics/__init__.py | 2 +- catalyst/metrics/tests/test_mrr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/catalyst/metrics/__init__.py b/catalyst/metrics/__init__.py index 41b0b89978..24f7b086b3 100644 --- a/catalyst/metrics/__init__.py +++ b/catalyst/metrics/__init__.py @@ -1,2 +1,2 @@ # flake8: noqa -from catalyst.utils.metrics.mrr import mrr +from catalyst.metrics.mrr import mrr diff --git a/catalyst/metrics/tests/test_mrr.py b/catalyst/metrics/tests/test_mrr.py index 384d450ca3..8e8bd800ab 100644 --- a/catalyst/metrics/tests/test_mrr.py +++ b/catalyst/metrics/tests/test_mrr.py @@ -5,7 +5,7 @@ def test_mrr(): """ - Tests for catalyst.utils.metrics.mrr metric. + Tests for catalyst.metrics.mrr metric. """ # # check 0 simple case y_pred = [0.5, 0.2] From 4947d4b615a4e4c1c246b5482e7032eeb4c0e528 Mon Sep 17 00:00:00 2001 From: Daniel Chepenko Date: Sun, 11 Oct 2020 19:49:11 +0900 Subject: [PATCH 30/30] fixed docs --- catalyst/metrics/tests/__init__.py | 0 docs/api/utils.rst | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 catalyst/metrics/tests/__init__.py diff --git a/catalyst/metrics/tests/__init__.py b/catalyst/metrics/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/api/utils.rst b/docs/api/utils.rst index f3f1bd5590..4cfe66902a 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -293,7 +293,7 @@ Functional MRR ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.utils.metrics.mrr +.. automodule:: catalyst.metrics.mrr :members: :undoc-members: :show-inheritance: