From b72450718f867a6fe067f274fdaa34ec090aa1b9 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 08:59:50 +0200
Subject: [PATCH 01/27] Deprecate public metric functions

---
 src/datasets/inspect.py | 15 +++++++++++++++
 src/datasets/load.py    |  9 +++++++++
 tests/test_warnings.py  |  0
 3 files changed, 24 insertions(+)
 create mode 100644 tests/test_warnings.py
diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
index 2e7e702e766..d3214effb10 100644
--- a/src/datasets/inspect.py
+++ b/src/datasets/inspect.py
@@ -28,6 +28,7 @@
 from .download.streaming_download_manager import StreamingDownloadManager
 from .info import DatasetInfo
 from .load import dataset_module_factory, import_main_class, load_dataset_builder, metric_module_factory
+from .utils.deprecation_utils import deprecated
 from .utils.file_utils import relative_to_absolute_path
 from .utils.logging import get_logger
 from .utils.version import Version
@@ -70,9 +71,16 @@ def list_datasets(with_community_datasets=True, with_details=False):
     return datasets
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def list_metrics(with_community_metrics=True, with_details=False):
     """List all the metrics script available on the Hugging Face Hub.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
         with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
         with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.
@@ -138,10 +146,17 @@ def inspect_dataset(path: str, local_path: str, download_config: Optional[Downlo
     )
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def inspect_metric(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
     r"""
     Allow inspection/modification of a metric script by copying it on local drive at local_path.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
         path (``str``): path to the dataset processing script with the dataset builder. Can be either:
 
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 567587583fd..b2914b61eb1 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1,4 +1,5 @@
 # Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
+# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -60,6 +61,7 @@
 )
 from .splits import Split
 from .tasks import TaskTemplate
+from .utils.deprecation_utils import deprecated
 from .utils.file_utils import (
     OfflineModeIsEnabled,
     _raise_if_offline_mode_is_enabled,
@@ -1337,6 +1339,7 @@ def metric_module_factory(
         raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}.")
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def load_metric(
     path: str,
     config_name: Optional[str] = None,
@@ -1352,6 +1355,12 @@ def load_metric(
 ) -> Metric:
     """Load a `datasets.Metric`.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
 
         path (``str``):
diff --git a/tests/test_warnings.py b/tests/test_warnings.py
new file mode 100644
index 00000000000..e69de29bb2d

From b91db4767bff937ee522f559add69b57ded11183 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:00:41 +0200
Subject: [PATCH 02/27] Test metric deprecation warnings

---
 tests/test_warnings.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/test_warnings.py b/tests/test_warnings.py
index e69de29bb2d..7dab6d55850 100644
--- a/tests/test_warnings.py
+++ b/tests/test_warnings.py
@@ -0,0 +1,29 @@
+import pytest
+
+from datasets import inspect_metric, list_metrics, load_metric
+
+
+# Used by list_metrics
+@pytest.fixture
+def mock_hfh(monkeypatch):
+    class MetricMock:
+        def __init__(self, metric_id):
+            self.id = metric_id
+
+    class HfhMock:
+        _metrics = [MetricMock(metric_id) for metric_id in ["accuracy", "mse", "precision", "codeparrot/apps_metric"]]
+
+        def list_metrics(self):
+            return self._metrics
+
+    monkeypatch.setattr("datasets.inspect.huggingface_hub", HfhMock())
+
+
+@pytest.mark.parametrize(
+    "func, args", [(load_metric, ("metrics/mse",)), (list_metrics, ()), (inspect_metric, ("metrics/mse", "tmp_path"))]
+)
+def test_metric_deprecation_warning(func, args, mock_hfh, tmp_path):
+    if "tmp_path" in args:
+        args = tuple(arg if arg != "tmp_path" else tmp_path for arg in args)
+    with pytest.warns(FutureWarning, match="https://huggingface.co/docs/evaluate"):
+        func(*args)

From a800b3a45638bb9ba7017311ba088c8b090ec225 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:02:44 +0200
Subject: [PATCH 03/27] Deprecate metrics in docs

---
 docs/source/about_metrics.mdx                     | 6 ++++++
 docs/source/package_reference/loading_methods.mdx | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/docs/source/about_metrics.mdx b/docs/source/about_metrics.mdx
index f47843d6303..565c998cdc4 100644
--- a/docs/source/about_metrics.mdx
+++ b/docs/source/about_metrics.mdx
@@ -1,5 +1,11 @@
 # All about metrics
 
+<Tip warning={true}>
+
+Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+
+</Tip>
+
 🤗 Datasets provides access to a wide range of NLP metrics. You can load metrics associated with benchmark datasets like GLUE or SQuAD, and complex metrics like BLEURT or BERTScore, with a single command: [`load_metric`]. Once you've loaded a metric, easily compute and evaluate a model's performance.
 
 ## ELI5: `load_metric`
diff --git a/docs/source/package_reference/loading_methods.mdx b/docs/source/package_reference/loading_methods.mdx
index cdae9ed21cb..e71231f4a5a 100644
--- a/docs/source/package_reference/loading_methods.mdx
+++ b/docs/source/package_reference/loading_methods.mdx
@@ -22,6 +22,12 @@ Methods for listing and loading datasets and metrics:
 
 ## Metrics
 
+<Tip warning={true}>
+
+Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+
+</Tip>
+
 [[autodoc]] datasets.list_metrics
 
 [[autodoc]] datasets.load_metric

From 23a357c9f0a1e69d927339614f1c8bd7848eb143 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:03:43 +0200
Subject: [PATCH 04/27] Remove mentions to metrics in docs and README

---
 README.md             | 4 +---
 docs/source/index.mdx | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ed86b2f2b0a..cbc17f711ef 100644
--- a/README.md
+++ b/README.md
@@ -85,8 +85,6 @@ For more details on using the library with NumPy, pandas, PyTorch or TensorFlow,
 
 - `datasets.list_datasets()` to list the available datasets
 - `datasets.load_dataset(dataset_name, **kwargs)` to instantiate a dataset
-- `datasets.list_metrics()` to list the available metrics
-- `datasets.load_metric(metric_name, **kwargs)` to instantiate a metric
 
 This library can be used for text/image/audio/etc. datasets. Here is an example to load a text dataset:
 
@@ -150,7 +148,7 @@ If you are familiar with the great TensorFlow Datasets, here are the main differ
 
 # Disclaimers
 
-Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license.
+Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute most of these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license.
 
 If you're a dataset owner and wish to update any part of it (description, citation, etc.), or do not want your dataset to be included in this library, please get in touch through a [GitHub issue](https://github.com/huggingface/datasets/issues/new). Thanks for your contribution to the ML community!
 
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 3432ca76404..ba1c17c5658 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -2,7 +2,7 @@
 
 <img class="float-left !m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[150px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/datasets_logo.png"/>
 
-🤗 Datasets is a library for easily accessing and sharing datasets, and evaluation metrics for Natural Language Processing (NLP), computer vision, and audio tasks.
+🤗 Datasets is a library for easily accessing and sharing datasets for Natural Language Processing (NLP), computer vision, and audio tasks.
 
 Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets, and more than 34 metrics available. 
 

From 386892217236763cf30933055eb90891871c210b Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:24:02 +0200
Subject: [PATCH 05/27] Deprecate internal metric functions/classes

---
 src/datasets/load.py | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/datasets/load.py b/src/datasets/load.py
index b2914b61eb1..070c3d8a467 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -511,8 +511,16 @@ def get_module(self) -> DatasetModule:
         return DatasetModule(module_path, hash, builder_kwargs)
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class GithubMetricModuleFactory(_MetricModuleFactory):
-    """Get the module of a metric. The metric script is downloaded from GitHub."""
+    """Get the module of a metric. The metric script is downloaded from GitHub.
+
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+    """
 
     def __init__(
         self,
@@ -578,8 +586,16 @@ def get_module(self) -> MetricModule:
         return MetricModule(module_path, hash)
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class LocalMetricModuleFactory(_MetricModuleFactory):
-    """Get the module of a local metric. The metric script is loaded from a local script."""
+    """Get the module of a local metric. The metric script is loaded from a local script.
+
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+    """
 
     def __init__(
         self,
@@ -1015,10 +1031,17 @@ def _get_modification_time(module_hash):
         return DatasetModule(module_path, hash, builder_kwargs)
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class CachedMetricModuleFactory(_MetricModuleFactory):
     """
     Get the module of a metric that has been loaded once already and cached.
     The script that is loaded from the cache is the most recent one with a matching name.
+
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
     """
 
     def __init__(
@@ -1253,6 +1276,7 @@ def dataset_module_factory(
         )
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def metric_module_factory(
     path: str,
     revision: Optional[Union[str, Version]] = None,
@@ -1264,7 +1288,13 @@ def metric_module_factory(
     """
     Download/extract/cache a metric module.
 
-    Metrics codes are cached inside the the dynamic modules cache to allow easy import (avoid ugly sys.path tweaks).
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
+    Metrics codes are cached inside the dynamic modules cache to allow easy import (avoid ugly sys.path tweaks).
 
     Args:
 

From d159d4aa19d4e3575502f784c349337e995b76aa Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:25:36 +0200
Subject: [PATCH 06/27] Warn metric deprecation only once

---
 src/datasets/load.py | 123 +++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 57 deletions(-)

diff --git a/src/datasets/load.py b/src/datasets/load.py
index 070c3d8a467..b69757610ac 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -22,6 +22,7 @@
 import os
 import shutil
 import time
+import warnings
 from collections import Counter
 from dataclasses import dataclass
 from pathlib import Path
@@ -1324,49 +1325,53 @@ def metric_module_factory(
     Returns:
         MetricModule
     """
-    if download_config is None:
-        download_config = DownloadConfig(**download_kwargs)
-    download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)
-    download_config.extract_compressed_file = True
-    download_config.force_extract = True
-
-    filename = list(filter(lambda x: x, path.replace(os.sep, "/").split("/")))[-1]
-    if not filename.endswith(".py"):
-        filename = filename + ".py"
-    combined_path = os.path.join(path, filename)
-    # Try locally
-    if path.endswith(filename):
-        if os.path.isfile(path):
+    with warnings.catch_warnings():
+        # Ignore equivalent warnings to the one already issued
+        warnings.filterwarnings("ignore", message=".*https://huggingface.co/docs/evaluate$", category=FutureWarning)
+
+        if download_config is None:
+            download_config = DownloadConfig(**download_kwargs)
+        download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)
+        download_config.extract_compressed_file = True
+        download_config.force_extract = True
+
+        filename = list(filter(lambda x: x, path.replace(os.sep, "/").split("/")))[-1]
+        if not filename.endswith(".py"):
+            filename = filename + ".py"
+        combined_path = os.path.join(path, filename)
+        # Try locally
+        if path.endswith(filename):
+            if os.path.isfile(path):
+                return LocalMetricModuleFactory(
+                    path, download_mode=download_mode, dynamic_modules_path=dynamic_modules_path
+                ).get_module()
+            else:
+                raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(path)}")
+        elif os.path.isfile(combined_path):
             return LocalMetricModuleFactory(
-                path, download_mode=download_mode, dynamic_modules_path=dynamic_modules_path
-            ).get_module()
-        else:
-            raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(path)}")
-    elif os.path.isfile(combined_path):
-        return LocalMetricModuleFactory(
-            combined_path, download_mode=download_mode, dynamic_modules_path=dynamic_modules_path
-        ).get_module()
-    elif is_relative_path(path) and path.count("/") == 0:
-        try:
-            return GithubMetricModuleFactory(
-                path,
-                revision=revision,
-                download_config=download_config,
-                download_mode=download_mode,
-                dynamic_modules_path=dynamic_modules_path,
+                combined_path, download_mode=download_mode, dynamic_modules_path=dynamic_modules_path
             ).get_module()
-        except Exception as e1:  # noqa: all the attempts failed, before raising the error we should check if the module is already cached.
+        elif is_relative_path(path) and path.count("/") == 0:
             try:
-                return CachedMetricModuleFactory(path, dynamic_modules_path=dynamic_modules_path).get_module()
-            except Exception as e2:  # noqa: if it's not in the cache, then it doesn't exist.
-                if not isinstance(e1, FileNotFoundError):
-                    raise e1 from None
-                raise FileNotFoundError(
-                    f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}. "
-                    f"Metric '{path}' doesn't exist on the Hugging Face Hub either."
-                ) from None
-    else:
-        raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}.")
+                return GithubMetricModuleFactory(
+                    path,
+                    revision=revision,
+                    download_config=download_config,
+                    download_mode=download_mode,
+                    dynamic_modules_path=dynamic_modules_path,
+                ).get_module()
+            except Exception as e1:  # noqa: all the attempts failed, before raising the error we should check if the module is already cached.
+                try:
+                    return CachedMetricModuleFactory(path, dynamic_modules_path=dynamic_modules_path).get_module()
+                except Exception as e2:  # noqa: if it's not in the cache, then it doesn't exist.
+                    if not isinstance(e1, FileNotFoundError):
+                        raise e1 from None
+                    raise FileNotFoundError(
+                        f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}. "
+                        f"Metric '{path}' doesn't exist on the Hugging Face Hub either."
+                    ) from None
+        else:
+            raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}.")
 
 
 @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
@@ -1424,25 +1429,29 @@ def load_metric(
     {'accuracy': 0.5}
     ```
     """
-    download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)
-    metric_module = metric_module_factory(
-        path, revision=revision, download_config=download_config, download_mode=download_mode
-    ).module_path
-    metric_cls = import_main_class(metric_module, dataset=False)
-    metric = metric_cls(
-        config_name=config_name,
-        process_id=process_id,
-        num_process=num_process,
-        cache_dir=cache_dir,
-        keep_in_memory=keep_in_memory,
-        experiment_id=experiment_id,
-        **metric_init_kwargs,
-    )
+    with warnings.catch_warnings():
+        # Ignore equivalent warnings to the one already issued
+        warnings.filterwarnings("ignore", message=".*https://huggingface.co/docs/evaluate$", category=FutureWarning)
+
+        download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)
+        metric_module = metric_module_factory(
+            path, revision=revision, download_config=download_config, download_mode=download_mode
+        ).module_path
+        metric_cls = import_main_class(metric_module, dataset=False)
+        metric = metric_cls(
+            config_name=config_name,
+            process_id=process_id,
+            num_process=num_process,
+            cache_dir=cache_dir,
+            keep_in_memory=keep_in_memory,
+            experiment_id=experiment_id,
+            **metric_init_kwargs,
+        )
 
-    # Download and prepare resources for the metric
-    metric.download_and_prepare(download_config=download_config)
+        # Download and prepare resources for the metric
+        metric.download_and_prepare(download_config=download_config)
 
-    return metric
+        return metric
 
 
 def load_dataset_builder(

From f698de6cb5767bb9d78f429d8062d5d9ef2a97da Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:29:35 +0200
Subject: [PATCH 07/27] Deprecate Metric class

---
 src/datasets/metric.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/datasets/metric.py b/src/datasets/metric.py
index a53ed7e7613..faaa7a47533 100644
--- a/src/datasets/metric.py
+++ b/src/datasets/metric.py
@@ -31,6 +31,7 @@
 from .features import Features
 from .info import DatasetInfo, MetricInfo
 from .naming import camelcase_to_snakecase
+from .utils.deprecation_utils import deprecated
 from .utils.filelock import BaseFileLock, FileLock, Timeout
 from .utils.logging import get_logger
 from .utils.py_utils import copyfunc, temp_seed
@@ -76,6 +77,13 @@ def format_chunk(chunk):
 class MetricInfoMixin:
     """This base class exposes some attributes of MetricInfo
     at the base level of the Metric for easy access.
+
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     """
 
     def __init__(self, info: MetricInfo):
@@ -138,6 +146,12 @@ def format(self) -> Optional[str]:
 class Metric(MetricInfoMixin):
     """A Metric is the base class and common API for all metrics.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
         config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data
             to be overridden when the metric loading script is modified.
@@ -155,6 +169,7 @@ class Metric(MetricInfoMixin):
         timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
     """
 
+    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         config_name: Optional[str] = None,

From 7816acc4e7fe1ea268b51bcb15a101bc836ad5bc Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 09:31:42 +0200
Subject: [PATCH 08/27] Support deprecating __init__ method for subclassed
 classes

---
 src/datasets/utils/deprecation_utils.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/datasets/utils/deprecation_utils.py b/src/datasets/utils/deprecation_utils.py
index c8fd1305391..6e4d35b9e61 100644
--- a/src/datasets/utils/deprecation_utils.py
+++ b/src/datasets/utils/deprecation_utils.py
@@ -20,12 +20,11 @@ def deprecated(help_message: Optional[str] = None):
 
     def decorator(deprecated_function: Callable):
         global _emitted_deprecation_warnings
+        name = deprecated_function.__name__
+        # Support deprecating __init__ class method: class name instead
+        name = name if name != "__init__" else deprecated_function.__qualname__.split(".")[-2]
         warning_msg = (
-            (
-                f"{deprecated_function.__name__} is deprecated and will be removed "
-                "in the next major version of datasets."
-            )
-            + f" {help_message}"
+            f"{name} is deprecated and will be removed " "in the next major version of datasets." + f" {help_message}"
             if help_message
             else ""
         )

From c9baf39dcf90c2f1a6437e09f57beb0617503fa5 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 10:51:09 +0200
Subject: [PATCH 09/27] Move deprecated decorator to __init__ class method

---
 src/datasets/load.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/datasets/load.py b/src/datasets/load.py
index b69757610ac..235273c3204 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -512,7 +512,6 @@ def get_module(self) -> DatasetModule:
         return DatasetModule(module_path, hash, builder_kwargs)
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class GithubMetricModuleFactory(_MetricModuleFactory):
     """Get the module of a metric. The metric script is downloaded from GitHub.
 
@@ -523,6 +522,7 @@ class GithubMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
+    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         name: str,
@@ -587,7 +587,6 @@ def get_module(self) -> MetricModule:
         return MetricModule(module_path, hash)
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class LocalMetricModuleFactory(_MetricModuleFactory):
     """Get the module of a local metric. The metric script is loaded from a local script.
 
@@ -598,6 +597,7 @@ class LocalMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
+    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         path: str,
@@ -1032,7 +1032,6 @@ def _get_modification_time(module_hash):
         return DatasetModule(module_path, hash, builder_kwargs)
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 class CachedMetricModuleFactory(_MetricModuleFactory):
     """
     Get the module of a metric that has been loaded once already and cached.
@@ -1045,6 +1044,7 @@ class CachedMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
+    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         name: str,

From 0b434bd0575ab2439f6a55d8ddec0ef954018cc2 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:04:55 +0200
Subject: [PATCH 10/27] Update deprecation message in docs

---
 docs/source/about_metrics.mdx                     | 2 +-
 docs/source/how_to_metrics.mdx                    | 2 +-
 docs/source/loading.mdx                           | 2 +-
 docs/source/metrics.mdx                           | 2 +-
 docs/source/package_reference/loading_methods.mdx | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/about_metrics.mdx b/docs/source/about_metrics.mdx
index 565c998cdc4..2e5b722f988 100644
--- a/docs/source/about_metrics.mdx
+++ b/docs/source/about_metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 
diff --git a/docs/source/how_to_metrics.mdx b/docs/source/how_to_metrics.mdx
index 2023193c145..db755077220 100644
--- a/docs/source/how_to_metrics.mdx
+++ b/docs/source/how_to_metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics are deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 
diff --git a/docs/source/loading.mdx b/docs/source/loading.mdx
index 6bebb16ae84..eef4d3290f0 100644
--- a/docs/source/loading.mdx
+++ b/docs/source/loading.mdx
@@ -340,7 +340,7 @@ Now when you look at your dataset features, you can see it uses the custom label
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 
diff --git a/docs/source/metrics.mdx b/docs/source/metrics.mdx
index 684378db2b1..3342fa847c4 100644
--- a/docs/source/metrics.mdx
+++ b/docs/source/metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 
diff --git a/docs/source/package_reference/loading_methods.mdx b/docs/source/package_reference/loading_methods.mdx
index e71231f4a5a..66508e3dbf1 100644
--- a/docs/source/package_reference/loading_methods.mdx
+++ b/docs/source/package_reference/loading_methods.mdx
@@ -24,7 +24,7 @@ Methods for listing and loading datasets and metrics:
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 

From 1d4d9b132523c1102f9deef4b4c0282ecd143028 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:13:22 +0200
Subject: [PATCH 11/27] Remove mentions to metrics in docstring/README

---
 README.md                     | 10 ++--------
 docs/source/index.mdx         |  2 +-
 src/datasets/inspect.py       |  2 +-
 src/datasets/utils/logging.py |  2 +-
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index cbc17f711ef..b6b39f794fb 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@
     <a href="https://hf.co/course"><img src="https://raw.githubusercontent.com/huggingface/datasets/main/docs/source/imgs/course_banner.png"></a>
 </h3>
 
-🤗 Datasets also provides access to +40 evaluation metrics and is designed to let the community easily add and share new datasets and evaluation metrics. 
+🤗 Datasets is designed to let the community easily add and share new datasets.
 
 🤗 Datasets has many additional interesting features:
 
@@ -91,7 +91,7 @@ This library can be used for text/image/audio/etc. datasets. Here is an example
 Here is a quick example:
 
 ```python
-from datasets import list_datasets, load_dataset, list_metrics, load_metric
+from datasets import list_datasets, load_dataset
 
 # Print all the available datasets
 print(list_datasets())
@@ -100,12 +100,6 @@ print(list_datasets())
 squad_dataset = load_dataset('squad')
 print(squad_dataset['train'][0])
 
-# List all the available metrics
-print(list_metrics())
-
-# Load a metric
-squad_metric = load_metric('squad')
-
 # Process the dataset - add a column with the length of the context texts
 dataset_with_length = squad_dataset.map(lambda x: {"length": len(x["context"])})
 
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index ba1c17c5658..26c2d0d8c41 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -4,7 +4,7 @@
 
 🤗 Datasets is a library for easily accessing and sharing datasets for Natural Language Processing (NLP), computer vision, and audio tasks.
 
-Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets, and more than 34 metrics available. 
+Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets available.
 
 Find your dataset today on the [Hugging Face Hub](https://huggingface.co/datasets), and take an in-depth look inside of it with the live viewer.
 
diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
index d3214effb10..b06944a8736 100644
--- a/src/datasets/inspect.py
+++ b/src/datasets/inspect.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Lint as: python3
-""" List and inspect datasets and metrics."""
+""" List and inspect datasets."""
 
 import inspect
 import os
diff --git a/src/datasets/utils/logging.py b/src/datasets/utils/logging.py
index 811aebbaff0..ebe9389c818 100644
--- a/src/datasets/utils/logging.py
+++ b/src/datasets/utils/logging.py
@@ -77,7 +77,7 @@ def _reset_library_root_logger() -> None:
 
 def get_logger(name: Optional[str] = None) -> logging.Logger:
     """Return a logger with the specified name.
-    This function can be used in dataset and metrics scripts.
+    This function can be used in dataset scripts.
     """
     if name is None:
         name = _get_library_name()

From 293e828e8d950305e34b6a1c3425d605ae322d95 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:15:00 +0200
Subject: [PATCH 12/27] Remove new_metric_script template

---
 templates/new_metric_script.py | 104 ---------------------------------
 1 file changed, 104 deletions(-)
 delete mode 100644 templates/new_metric_script.py

diff --git a/templates/new_metric_script.py b/templates/new_metric_script.py
deleted file mode 100644
index 5168beda388..00000000000
--- a/templates/new_metric_script.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""TODO: Add a description here."""
-
-import datasets
-
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@InProceedings{huggingface:metric,
-title = {A great new metric},
-authors={huggingface, Inc.},
-year={2020}
-}
-"""
-
-# TODO: Add description of the metric here
-_DESCRIPTION = """\
-This new metric is designed to solve this great NLP task and is crafted with a lot of care.
-"""
-
-
-# TODO: Add description of the arguments of the metric here
-_KWARGS_DESCRIPTION = """
-Calculates how good are predictions given some references, using certain scores
-Args:
-    predictions: list of predictions to score. Each predictions
-        should be a string with tokens separated by spaces.
-    references: list of reference for each prediction. Each
-        reference should be a string with tokens separated by spaces.
-Returns:
-    accuracy: description of the first score,
-    another_score: description of the second score,
-Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
-
-    >>> my_new_metric = datasets.load_metric("my_new_metric")
-    >>> results = my_new_metric.compute(references=[0, 1], predictions=[0, 1])
-    >>> print(results)
-    {'accuracy': 1.0}
-"""
-
-# TODO: Define external resources urls if needed
-BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
-
-
-@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
-class NewMetric(datasets.Metric):
-    """TODO: Short description of my metric."""
-
-    def _info(self):
-        # TODO: Specifies the datasets.MetricInfo object
-        return datasets.MetricInfo(
-            # This is the description that will appear on the metrics page.
-            description=_DESCRIPTION,
-            citation=_CITATION,
-            inputs_description=_KWARGS_DESCRIPTION,
-            # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('string'),
-                'references': datasets.Value('string'),
-            }),
-            # Homepage of the metric for documentation
-            homepage="http://metric.homepage",
-            # Additional links to the codebase or references
-            codebase_urls=["http://github.com/path/to/codebase/of/new_metric"],
-            reference_urls=["http://path.to.reference.url/new_metric"]
-        )
-
-    def _download_and_prepare(self, dl_manager):
-        """Optional: download external resources useful to compute the scores"""
-        # TODO: Download external resources if needed
-        bad_words_path = dl_manager.download_and_extract(BAD_WORDS_URL)
-        self.bad_words = {w.strip() for w in open(bad_words_path, encoding="utf-8")}
-
-    def _compute(self, predictions, references):
-        """Returns the scores"""
-        # TODO: Compute the different scores of the metric
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
-
-        if self.config_name == "max":
-            second_score = max(abs(len(i) - len(j)) for i, j in zip(predictions, references) if i not in self.bad_words)
-        elif self.config_name == "mean":
-            second_score = sum(abs(len(i) - len(j)) for i, j in zip(predictions, references) if i not in self.bad_words)
-            second_score /= sum(i not in self.bad_words for i in predictions)
-        else:
-            raise ValueError(f"Invalid config name for NewMetric: {self.config_name}. Please use 'max' or 'mean'.")
-
-        return {
-            "accuracy": accuracy,
-            "second_score": second_score,
-        }

From c77e04ec5f8b1afd3ef0257a90de07f951872865 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:33:30 +0200
Subject: [PATCH 13/27] Skip metric tests

---
 tests/test_inspect.py       |  1 +
 tests/test_load.py          | 10 +++++++---
 tests/test_metric.py        |  3 +++
 tests/test_metric_common.py |  3 +++
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/test_inspect.py b/tests/test_inspect.py
index 5774ea8b889..df3269841a1 100644
--- a/tests/test_inspect.py
+++ b/tests/test_inspect.py
@@ -20,6 +20,7 @@ def test_inspect_dataset(path, tmp_path):
     assert "__pycache__" not in os.listdir(tmp_path)
 
 
+@pytest.mark.skip(reason="metrics are deprecated")
 @pytest.mark.parametrize("path", ["accuracy"])
 def test_inspect_metric(path, tmp_path):
     inspect_metric(path, tmp_path)
diff --git a/tests/test_load.py b/tests/test_load.py
index 712f9b55e93..0ad1ce03f9b 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -264,6 +264,7 @@ def test_GithubDatasetModuleFactory(self):
         assert importlib.import_module(module_factory_result.module_path) is not None
         assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
 
+    @pytest.mark.skip(reason="metrics are deprecated")
     def test_GithubMetricModuleFactory_with_internal_import(self):
         # "squad_v2" requires additional imports (internal)
         factory = GithubMetricModuleFactory(
@@ -272,6 +273,7 @@ def test_GithubMetricModuleFactory_with_internal_import(self):
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
 
+    @pytest.mark.skip(reason="metrics are deprecated")
     def test_GithubMetricModuleFactory_with_external_import(self):
         # "bleu" requires additional imports (external from github)
         factory = GithubMetricModuleFactory(
@@ -280,6 +282,7 @@ def test_GithubMetricModuleFactory_with_external_import(self):
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
 
+    @pytest.mark.skip(reason="metrics are deprecated")
     def test_LocalMetricModuleFactory(self):
         path = os.path.join(self._metric_loading_script_dir, f"{METRIC_LOADING_SCRIPT_NAME}.py")
         factory = LocalMetricModuleFactory(
@@ -454,6 +457,7 @@ def test_CachedDatasetModuleFactory(self):
                 module_factory_result = factory.get_module()
                 assert importlib.import_module(module_factory_result.module_path) is not None
 
+    @pytest.mark.skip(reason="metrics are deprecated")
     def test_CachedMetricModuleFactory(self):
         path = os.path.join(self._metric_loading_script_dir, f"{METRIC_LOADING_SCRIPT_NAME}.py")
         factory = LocalMetricModuleFactory(
@@ -474,14 +478,14 @@ def test_CachedMetricModuleFactory(self):
     "factory_class",
     [
         CachedDatasetModuleFactory,
-        CachedMetricModuleFactory,
+        pytest.param(CachedMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
         GithubDatasetModuleFactory,
-        GithubMetricModuleFactory,
+        pytest.param(GithubMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
         HubDatasetModuleFactoryWithoutScript,
         HubDatasetModuleFactoryWithScript,
         LocalDatasetModuleFactoryWithoutScript,
         LocalDatasetModuleFactoryWithScript,
-        LocalMetricModuleFactory,
+        pytest.param(LocalMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
         PackagedDatasetModuleFactory,
     ],
 )
diff --git a/tests/test_metric.py b/tests/test_metric.py
index 2899683d1f4..ea743512148 100644
--- a/tests/test_metric.py
+++ b/tests/test_metric.py
@@ -13,6 +13,9 @@
 from .utils import require_tf, require_torch
 
 
+pytestmark = pytest.mark.skip(reason="metrics are deprecated")
+
+
 class DummyMetric(Metric):
     def _info(self):
         return MetricInfo(
diff --git a/tests/test_metric_common.py b/tests/test_metric_common.py
index 240db64dae6..3ca05d4064f 100644
--- a/tests/test_metric_common.py
+++ b/tests/test_metric_common.py
@@ -32,6 +32,9 @@
 from .utils import for_all_test_methods, local, slow
 
 
+pytestmark = pytest.mark.skip(reason="metrics are deprecated")
+
+
 REQUIRE_FAIRSEQ = {"comet"}
 _has_fairseq = importlib.util.find_spec("fairseq") is not None
 

From b97df664c7d1e9b8c5725883069c59836360990a Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:56:10 +0200
Subject: [PATCH 14/27] Remove metrics from code quality check

---
 .github/workflows/ci.yml |  6 +++---
 Makefile                 | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b4f14085ee4..e1a29ea9a7e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,9 +28,9 @@ jobs:
           pip install .[quality]
       - name: Check quality
         run: |
-          black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics
-          isort --check-only tests src benchmarks datasets metrics
-          flake8 tests src benchmarks datasets metrics
+          black --check --line-length 119 --target-version py36 tests src benchmarks datasets
+          isort --check-only tests src benchmarks datasets
+          flake8 tests src benchmarks datasets
 
   test:
     needs: check_code_quality
diff --git a/Makefile b/Makefile
index e3615d44ed0..ad0ede52d32 100644
--- a/Makefile
+++ b/Makefile
@@ -3,15 +3,15 @@
 # Check that source code meets quality standards
 
 quality:
-	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
-	isort --check-only tests src benchmarks datasets/**/*.py metrics
-	flake8 tests src benchmarks datasets/**/*.py metrics
+	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
+	isort --check-only tests src benchmarks datasets/**/*.py
+	flake8 tests src benchmarks datasets/**/*.py
 
 # Format source code automatically
 
 style:
-	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
-	isort tests src benchmarks datasets/**/*.py metrics
+	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
+	isort tests src benchmarks datasets/**/*.py
 
 # Run tests for the library
 

From 31d7d6f9775353d80f8eb8ceda414032234ea7a8 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 15:57:10 +0200
Subject: [PATCH 15/27] Remove metric test requirements

---
 setup.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/setup.py b/setup.py
index a055a239994..b266642e5fa 100644
--- a/setup.py
+++ b/setup.py
@@ -147,17 +147,6 @@
     "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz",
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
     "sacremoses",
-    # metrics dependencies
-    "bert_score>=0.3.6",
-    "jiwer",
-    "mauve-text",
-    "rouge_score<0.0.7",
-    "sacrebleu",
-    "sacremoses",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",  # for bleurt
-    "seqeval",
     # to speed up pip backtracking
     "toml>=0.10.1",
     "requests_file>=1.5.1",

From 929c228dd5edd610e71b44f63cc511202fdd310e Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 16:22:52 +0200
Subject: [PATCH 16/27] Add rouge_score test requirement needed by bigbench

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index b266642e5fa..6b7b07af6b9 100644
--- a/setup.py
+++ b/setup.py
@@ -146,6 +146,7 @@
     "zstandard",
     "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz",
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
+    "rouge_score<0.0.7",  # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
     "sacremoses",
     # to speed up pip backtracking
     "toml>=0.10.1",

From e255548f3428371a8d9d69e09a3a37159eaf15d4 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 17:08:20 +0200
Subject: [PATCH 17/27] Remove metrics additional tests requirements

---
 .github/workflows/ci.yml          | 1 -
 additional-tests-requirements.txt | 4 ----
 2 files changed, 5 deletions(-)
 delete mode 100644 additional-tests-requirements.txt

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e1a29ea9a7e..193575152bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,7 +67,6 @@ jobs:
       - name: Install dependencies
         run: |
           pip install .[tests]
-          pip install -r additional-tests-requirements.txt --no-deps
       - name: Install latest PyArrow
         if: ${{ matrix.pyarrow_version == 'latest' }}
         run: pip install pyarrow --upgrade
diff --git a/additional-tests-requirements.txt b/additional-tests-requirements.txt
deleted file mode 100644
index a827c308c9f..00000000000
--- a/additional-tests-requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-unbabel-comet>=1.0.0;python_version>'3.6'
-git+https://github.com/google-research/bleurt.git
-git+https://github.com/ns-moosavi/coval.git
-git+https://github.com/hendrycks/math.git

From 00e8aac88cd094db8f6a5654c0f1b9ba29b1929e Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 25 Jul 2022 17:24:02 +0200
Subject: [PATCH 18/27] Remove test requirements only used by metrics

---
 .github/workflows/ci.yml | 3 ---
 setup.py                 | 6 +-----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 193575152bc..f5025562153 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -61,9 +61,6 @@ jobs:
           python-version: 3.7
       - name: Upgrade pip
         run: python -m pip install --upgrade pip
-      - name: Pin setuptools-scm
-        if: ${{ matrix.os == 'ubuntu-latest' }}
-        run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2"
       - name: Install dependencies
         run: |
           pip install .[tests]
diff --git a/setup.py b/setup.py
index 6b7b07af6b9..3c8b796522c 100644
--- a/setup.py
+++ b/setup.py
@@ -123,6 +123,7 @@
     "botocore>=1.22.8",  # to be compatible with aiobotocore and boto3
     "faiss-cpu>=1.6.4",
     "fsspec[s3]",
+    "lz4",
     "moto[s3,server]==2.0.4",
     "rarfile>=4.0",
     "s3fs>=2021.11.1",  # aligned with fsspec[http]>=2021.11.1
@@ -135,19 +136,14 @@
     "bs4",
     "conllu",
     "h5py",
-    "langdetect",
     "lxml",
-    "lz4",
     "mwparserfromhell",
-    "nltk",
     "openpyxl",
     "py7zr",
-    "tldextract",
     "zstandard",
     "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz",
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
     "rouge_score<0.0.7",  # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
-    "sacremoses",
     # to speed up pip backtracking
     "toml>=0.10.1",
     "requests_file>=1.5.1",

From 755010da1934b725b2ff4e4c8fe1809e69545a39 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 26 Jul 2022 15:55:51 +0200
Subject: [PATCH 19/27] Address requested changes

---
 docs/source/how_to_metrics.mdx | 2 +-
 docs/source/index.mdx          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/how_to_metrics.mdx b/docs/source/how_to_metrics.mdx
index db755077220..157214e5559 100644
--- a/docs/source/how_to_metrics.mdx
+++ b/docs/source/how_to_metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics are deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 26c2d0d8c41..993ffe1a4e2 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -4,7 +4,7 @@
 
 🤗 Datasets is a library for easily accessing and sharing datasets for Natural Language Processing (NLP), computer vision, and audio tasks.
 
-Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets available.
+Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community.
 
 Find your dataset today on the [Hugging Face Hub](https://huggingface.co/datasets), and take an in-depth look inside of it with the live viewer.
 

From fe20f3133831baaf1deb3339247e7ccd27c0d539 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 26 Jul 2022 16:00:27 +0200
Subject: [PATCH 20/27] Update deprecation version after latest release

---
 src/datasets/inspect.py |  8 ++++----
 src/datasets/load.py    | 20 ++++++++++----------
 src/datasets/metric.py  |  8 ++++----
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
index b06944a8736..44b8d573cc3 100644
--- a/src/datasets/inspect.py
+++ b/src/datasets/inspect.py
@@ -75,9 +75,9 @@ def list_datasets(with_community_datasets=True, with_details=False):
 def list_metrics(with_community_metrics=True, with_details=False):
     """List all the metrics script available on the Hugging Face Hub.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
@@ -151,9 +151,9 @@ def inspect_metric(path: str, local_path: str, download_config: Optional[Downloa
     r"""
     Allow inspection/modification of a metric script by copying it on local drive at local_path.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 235273c3204..fe6394a3d28 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -515,9 +515,9 @@ def get_module(self) -> DatasetModule:
 class GithubMetricModuleFactory(_MetricModuleFactory):
     """Get the module of a metric. The metric script is downloaded from GitHub.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
     """
@@ -590,9 +590,9 @@ def get_module(self) -> MetricModule:
 class LocalMetricModuleFactory(_MetricModuleFactory):
     """Get the module of a local metric. The metric script is loaded from a local script.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
     """
@@ -1037,9 +1037,9 @@ class CachedMetricModuleFactory(_MetricModuleFactory):
     Get the module of a metric that has been loaded once already and cached.
     The script that is loaded from the cache is the most recent one with a matching name.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
     """
@@ -1289,9 +1289,9 @@ def metric_module_factory(
     """
     Download/extract/cache a metric module.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
@@ -1390,9 +1390,9 @@ def load_metric(
 ) -> Metric:
     """Load a `datasets.Metric`.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
diff --git a/src/datasets/metric.py b/src/datasets/metric.py
index faaa7a47533..33de1bc2593 100644
--- a/src/datasets/metric.py
+++ b/src/datasets/metric.py
@@ -78,9 +78,9 @@ class MetricInfoMixin:
     """This base class exposes some attributes of MetricInfo
     at the base level of the Metric for easy access.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
@@ -146,9 +146,9 @@ def format(self) -> Optional[str]:
 class Metric(MetricInfoMixin):
     """A Metric is the base class and common API for all metrics.
 
-    <Deprecated version="2.4.0">
+    <Deprecated version="2.5.0">
 
-    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 

From 8ce323eacfbd776f217ca4ca8bfadb56663abfe5 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 26 Jul 2022 22:57:05 +0200
Subject: [PATCH 21/27] Remove repeated comment

---
 src/datasets/load.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/datasets/load.py b/src/datasets/load.py
index fe6394a3d28..67cb737b752 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1,5 +1,4 @@
 # Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
-# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 642d48193d1af501d48e39a1815e81d1a62c3a50 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 26 Jul 2022 22:58:35 +0200
Subject: [PATCH 22/27] Give hint to switch to evaluate

---
 src/datasets/inspect.py | 8 ++++----
 src/datasets/load.py    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
index 44b8d573cc3..bda32a49f06 100644
--- a/src/datasets/inspect.py
+++ b/src/datasets/inspect.py
@@ -71,13 +71,13 @@ def list_datasets(with_community_datasets=True, with_details=False):
     return datasets
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+@deprecated("Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate")
 def list_metrics(with_community_metrics=True, with_details=False):
     """List all the metrics script available on the Hugging Face Hub.
 
     <Deprecated version="2.5.0">
 
-    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
@@ -146,14 +146,14 @@ def inspect_dataset(path: str, local_path: str, download_config: Optional[Downlo
     )
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+@deprecated("Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate")
 def inspect_metric(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
     r"""
     Allow inspection/modification of a metric script by copying it on local drive at local_path.
 
     <Deprecated version="2.5.0">
 
-    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 67cb737b752..41664327eae 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1373,7 +1373,7 @@ def metric_module_factory(
             raise FileNotFoundError(f"Couldn't find a metric script at {relative_to_absolute_path(combined_path)}.")
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+@deprecated("Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate")
 def load_metric(
     path: str,
     config_name: Optional[str] = None,
@@ -1391,7 +1391,7 @@ def load_metric(
 
     <Deprecated version="2.5.0">
 
-    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    Use `evaluate.load` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
 
     </Deprecated>
 

From 84b47d04a7a4c214ad17626e3aa4b1a9e52f3bf6 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 26 Jul 2022 22:58:56 +0200
Subject: [PATCH 23/27] Fix minor details

---
 src/datasets/load.py                    | 8 ++++----
 src/datasets/metric.py                  | 2 +-
 src/datasets/utils/deprecation_utils.py | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/datasets/load.py b/src/datasets/load.py
index 41664327eae..a7cb698b727 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -521,7 +521,7 @@ class GithubMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
-    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+    @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         name: str,
@@ -596,7 +596,7 @@ class LocalMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
-    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+    @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         path: str,
@@ -1043,7 +1043,7 @@ class CachedMetricModuleFactory(_MetricModuleFactory):
     </Deprecated>
     """
 
-    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+    @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         name: str,
@@ -1276,7 +1276,7 @@ def dataset_module_factory(
         )
 
 
-@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+@deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def metric_module_factory(
     path: str,
     revision: Optional[Union[str, Version]] = None,
diff --git a/src/datasets/metric.py b/src/datasets/metric.py
index 33de1bc2593..5aa8a025735 100644
--- a/src/datasets/metric.py
+++ b/src/datasets/metric.py
@@ -169,7 +169,7 @@ class Metric(MetricInfoMixin):
         timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
     """
 
-    @deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+    @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
     def __init__(
         self,
         config_name: Optional[str] = None,
diff --git a/src/datasets/utils/deprecation_utils.py b/src/datasets/utils/deprecation_utils.py
index 6e4d35b9e61..6d70f42ae7b 100644
--- a/src/datasets/utils/deprecation_utils.py
+++ b/src/datasets/utils/deprecation_utils.py
@@ -24,7 +24,7 @@ def decorator(deprecated_function: Callable):
         # Support deprecating __init__ class method: class name instead
         name = name if name != "__init__" else deprecated_function.__qualname__.split(".")[-2]
         warning_msg = (
-            f"{name} is deprecated and will be removed " "in the next major version of datasets." + f" {help_message}"
+            f"{name} is deprecated and will be removed in the next major version of datasets." + f" {help_message}"
             if help_message
             else ""
         )

From e8694ffb8813d76f153b5148cd82c57d10b74b81 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 28 Jul 2022 11:25:04 +0200
Subject: [PATCH 24/27] Revert removal of metrics CI tests

---
 .github/workflows/ci.yml          | 10 +++++++---
 Makefile                          | 10 +++++-----
 additional-tests-requirements.txt |  4 ++++
 setup.py                          | 15 +++++++++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 additional-tests-requirements.txt

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f5025562153..b4f14085ee4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,9 +28,9 @@ jobs:
           pip install .[quality]
       - name: Check quality
         run: |
-          black --check --line-length 119 --target-version py36 tests src benchmarks datasets
-          isort --check-only tests src benchmarks datasets
-          flake8 tests src benchmarks datasets
+          black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics
+          isort --check-only tests src benchmarks datasets metrics
+          flake8 tests src benchmarks datasets metrics
 
   test:
     needs: check_code_quality
@@ -61,9 +61,13 @@ jobs:
           python-version: 3.7
       - name: Upgrade pip
         run: python -m pip install --upgrade pip
+      - name: Pin setuptools-scm
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+        run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2"
       - name: Install dependencies
         run: |
           pip install .[tests]
+          pip install -r additional-tests-requirements.txt --no-deps
       - name: Install latest PyArrow
         if: ${{ matrix.pyarrow_version == 'latest' }}
         run: pip install pyarrow --upgrade
diff --git a/Makefile b/Makefile
index ad0ede52d32..e3615d44ed0 100644
--- a/Makefile
+++ b/Makefile
@@ -3,15 +3,15 @@
 # Check that source code meets quality standards
 
 quality:
-	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
-	isort --check-only tests src benchmarks datasets/**/*.py
-	flake8 tests src benchmarks datasets/**/*.py
+	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
+	isort --check-only tests src benchmarks datasets/**/*.py metrics
+	flake8 tests src benchmarks datasets/**/*.py metrics
 
 # Format source code automatically
 
 style:
-	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
-	isort tests src benchmarks datasets/**/*.py
+	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
+	isort tests src benchmarks datasets/**/*.py metrics
 
 # Run tests for the library
 
diff --git a/additional-tests-requirements.txt b/additional-tests-requirements.txt
new file mode 100644
index 00000000000..a827c308c9f
--- /dev/null
+++ b/additional-tests-requirements.txt
@@ -0,0 +1,4 @@
+unbabel-comet>=1.0.0;python_version>'3.6'
+git+https://github.com/google-research/bleurt.git
+git+https://github.com/ns-moosavi/coval.git
+git+https://github.com/hendrycks/math.git
diff --git a/setup.py b/setup.py
index 3c8b796522c..8bcb55882a3 100644
--- a/setup.py
+++ b/setup.py
@@ -144,6 +144,21 @@
     "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz",
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
     "rouge_score<0.0.7",  # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
+    "sacremoses",
+    # metrics dependencies
+    "bert_score>=0.3.6",
+    "jiwer",
+    "langdetect",
+    "mauve-text",
+    "nltk",
+    # "rouge_score<0.0.7",  # also required by bigbench
+    "sacrebleu",
+    "sacremoses",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",  # for bleurt
+    "seqeval",
+    "tldextract",
     # to speed up pip backtracking
     "toml>=0.10.1",
     "requests_file>=1.5.1",

From b704f26aafdbe05d2194f7f0e89fb7e0448bfde2 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 28 Jul 2022 11:29:56 +0200
Subject: [PATCH 25/27] Revert removal of metrics CI tests

---
 tests/test_inspect.py       |  1 -
 tests/test_load.py          | 10 +++-------
 tests/test_metric.py        |  3 ---
 tests/test_metric_common.py |  3 ---
 4 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/tests/test_inspect.py b/tests/test_inspect.py
index df3269841a1..5774ea8b889 100644
--- a/tests/test_inspect.py
+++ b/tests/test_inspect.py
@@ -20,7 +20,6 @@ def test_inspect_dataset(path, tmp_path):
     assert "__pycache__" not in os.listdir(tmp_path)
 
 
-@pytest.mark.skip(reason="metrics are deprecated")
 @pytest.mark.parametrize("path", ["accuracy"])
 def test_inspect_metric(path, tmp_path):
     inspect_metric(path, tmp_path)
diff --git a/tests/test_load.py b/tests/test_load.py
index 0ad1ce03f9b..712f9b55e93 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -264,7 +264,6 @@ def test_GithubDatasetModuleFactory(self):
         assert importlib.import_module(module_factory_result.module_path) is not None
         assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
 
-    @pytest.mark.skip(reason="metrics are deprecated")
     def test_GithubMetricModuleFactory_with_internal_import(self):
         # "squad_v2" requires additional imports (internal)
         factory = GithubMetricModuleFactory(
@@ -273,7 +272,6 @@ def test_GithubMetricModuleFactory_with_internal_import(self):
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
 
-    @pytest.mark.skip(reason="metrics are deprecated")
     def test_GithubMetricModuleFactory_with_external_import(self):
         # "bleu" requires additional imports (external from github)
         factory = GithubMetricModuleFactory(
@@ -282,7 +280,6 @@ def test_GithubMetricModuleFactory_with_external_import(self):
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
 
-    @pytest.mark.skip(reason="metrics are deprecated")
     def test_LocalMetricModuleFactory(self):
         path = os.path.join(self._metric_loading_script_dir, f"{METRIC_LOADING_SCRIPT_NAME}.py")
         factory = LocalMetricModuleFactory(
@@ -457,7 +454,6 @@ def test_CachedDatasetModuleFactory(self):
                 module_factory_result = factory.get_module()
                 assert importlib.import_module(module_factory_result.module_path) is not None
 
-    @pytest.mark.skip(reason="metrics are deprecated")
     def test_CachedMetricModuleFactory(self):
         path = os.path.join(self._metric_loading_script_dir, f"{METRIC_LOADING_SCRIPT_NAME}.py")
         factory = LocalMetricModuleFactory(
@@ -478,14 +474,14 @@ def test_CachedMetricModuleFactory(self):
     "factory_class",
     [
         CachedDatasetModuleFactory,
-        pytest.param(CachedMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
+        CachedMetricModuleFactory,
         GithubDatasetModuleFactory,
-        pytest.param(GithubMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
+        GithubMetricModuleFactory,
         HubDatasetModuleFactoryWithoutScript,
         HubDatasetModuleFactoryWithScript,
         LocalDatasetModuleFactoryWithoutScript,
         LocalDatasetModuleFactoryWithScript,
-        pytest.param(LocalMetricModuleFactory, marks=pytest.mark.skip(reason="metrics are deprecated")),
+        LocalMetricModuleFactory,
         PackagedDatasetModuleFactory,
     ],
 )
diff --git a/tests/test_metric.py b/tests/test_metric.py
index ea743512148..2899683d1f4 100644
--- a/tests/test_metric.py
+++ b/tests/test_metric.py
@@ -13,9 +13,6 @@
 from .utils import require_tf, require_torch
 
 
-pytestmark = pytest.mark.skip(reason="metrics are deprecated")
-
-
 class DummyMetric(Metric):
     def _info(self):
         return MetricInfo(
diff --git a/tests/test_metric_common.py b/tests/test_metric_common.py
index 3ca05d4064f..240db64dae6 100644
--- a/tests/test_metric_common.py
+++ b/tests/test_metric_common.py
@@ -32,9 +32,6 @@
 from .utils import for_all_test_methods, local, slow
 
 
-pytestmark = pytest.mark.skip(reason="metrics are deprecated")
-
-
 REQUIRE_FAIRSEQ = {"comet"}
 _has_fairseq = importlib.util.find_spec("fairseq") is not None
 

From dd019b44398bc9a31c2d6501f6a13af99abc54f6 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 28 Jul 2022 11:30:08 +0200
Subject: [PATCH 26/27] Fix style

---
 src/datasets/inspect.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
index bda32a49f06..9f62076af95 100644
--- a/src/datasets/inspect.py
+++ b/src/datasets/inspect.py
@@ -71,7 +71,9 @@ def list_datasets(with_community_datasets=True, with_details=False):
     return datasets
 
 
-@deprecated("Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate")
+@deprecated(
+    "Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate"
+)
 def list_metrics(with_community_metrics=True, with_details=False):
     """List all the metrics script available on the Hugging Face Hub.
 
@@ -146,7 +148,9 @@ def inspect_dataset(path: str, local_path: str, download_config: Optional[Downlo
     )
 
 
-@deprecated("Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate")
+@deprecated(
+    "Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate"
+)
 def inspect_metric(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
     r"""
     Allow inspection/modification of a metric script by copying it on local drive at local_path.

From c494804f14e433220d17518f977dde18889f9b41 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 28 Jul 2022 12:58:28 +0200
Subject: [PATCH 27/27] Mock emitted_deprecation_warnings to test warnings

---
 tests/test_warnings.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/test_warnings.py b/tests/test_warnings.py
index 7dab6d55850..eedcbb82ae4 100644
--- a/tests/test_warnings.py
+++ b/tests/test_warnings.py
@@ -3,6 +3,11 @@
 from datasets import inspect_metric, list_metrics, load_metric
 
 
+@pytest.fixture
+def mock_emitted_deprecation_warnings(monkeypatch):
+    monkeypatch.setattr("datasets.utils.deprecation_utils._emitted_deprecation_warnings", set())
+
+
 # Used by list_metrics
 @pytest.fixture
 def mock_hfh(monkeypatch):
@@ -22,7 +27,7 @@ def list_metrics(self):
 @pytest.mark.parametrize(
     "func, args", [(load_metric, ("metrics/mse",)), (list_metrics, ()), (inspect_metric, ("metrics/mse", "tmp_path"))]
 )
-def test_metric_deprecation_warning(func, args, mock_hfh, tmp_path):
+def test_metric_deprecation_warning(func, args, mock_emitted_deprecation_warnings, mock_hfh, tmp_path):
     if "tmp_path" in args:
         args = tuple(arg if arg != "tmp_path" else tmp_path for arg in args)
     with pytest.warns(FutureWarning, match="https://huggingface.co/docs/evaluate"):