huggingface · albertvillanova · Jul 28, 2022 · Jul 25, 2022 · Jul 25, 2022 · Jul 25, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,9 +28,9 @@ jobs:
           pip install .[quality]
       - name: Check quality
         run: |
-          black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics
-          isort --check-only tests src benchmarks datasets metrics
-          flake8 tests src benchmarks datasets metrics
+          black --check --line-length 119 --target-version py36 tests src benchmarks datasets
+          isort --check-only tests src benchmarks datasets
+          flake8 tests src benchmarks datasets
 
   test:
     needs: check_code_quality
@@ -61,13 +61,9 @@ jobs:
           python-version: 3.7
       - name: Upgrade pip
         run: python -m pip install --upgrade pip
-      - name: Pin setuptools-scm
-        if: ${{ matrix.os == 'ubuntu-latest' }}
-        run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.6" && pip install "setuptools-scm==6.4.2"
       - name: Install dependencies
         run: |
           pip install .[tests]
-          pip install -r additional-tests-requirements.txt --no-deps
       - name: Install latest PyArrow
         if: ${{ matrix.pyarrow_version == 'latest' }}
         run: pip install pyarrow --upgrade

diff --git a/Makefile b/Makefile
@@ -3,15 +3,15 @@
 # Check that source code meets quality standards
 
 quality:
-	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
-	isort --check-only tests src benchmarks datasets/**/*.py metrics
-	flake8 tests src benchmarks datasets/**/*.py metrics
+	black --check --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
+	isort --check-only tests src benchmarks datasets/**/*.py
+	flake8 tests src benchmarks datasets/**/*.py
 
 # Format source code automatically
 
 style:
-	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py metrics
-	isort tests src benchmarks datasets/**/*.py metrics
+	black --line-length 119 --target-version py36 tests src benchmarks datasets/**/*.py
+	isort tests src benchmarks datasets/**/*.py
 
 # Run tests for the library
 

diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@
     <a href="https://hf.co/course"><img src="https://raw.githubusercontent.com/huggingface/datasets/main/docs/source/imgs/course_banner.png"></a>
 </h3>
 
-🤗 Datasets also provides access to +40 evaluation metrics and is designed to let the community easily add and share new datasets and evaluation metrics. 
+🤗 Datasets is designed to let the community easily add and share new datasets.
 
 🤗 Datasets has many additional interesting features:
 
@@ -85,15 +85,13 @@ For more details on using the library with NumPy, pandas, PyTorch or TensorFlow,
 
 - `datasets.list_datasets()` to list the available datasets
 - `datasets.load_dataset(dataset_name, **kwargs)` to instantiate a dataset
-- `datasets.list_metrics()` to list the available metrics
-- `datasets.load_metric(metric_name, **kwargs)` to instantiate a metric
 
 This library can be used for text/image/audio/etc. datasets. Here is an example to load a text dataset:
 
 Here is a quick example:
 
 ```python
-from datasets import list_datasets, load_dataset, list_metrics, load_metric
+from datasets import list_datasets, load_dataset
 
 # Print all the available datasets
 print(list_datasets())
@@ -102,12 +100,6 @@ print(list_datasets())
 squad_dataset = load_dataset('squad')
 print(squad_dataset['train'][0])
 
-# List all the available metrics
-print(list_metrics())
-
-# Load a metric
-squad_metric = load_metric('squad')
-
 # Process the dataset - add a column with the length of the context texts
 dataset_with_length = squad_dataset.map(lambda x: {"length": len(x["context"])})
 
@@ -150,7 +142,7 @@ If you are familiar with the great TensorFlow Datasets, here are the main differ
 
 # Disclaimers
 
-Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license.
+Similar to TensorFlow Datasets, 🤗 Datasets is a utility library that downloads and prepares public datasets. We do not host or distribute most of these datasets, vouch for their quality or fairness, or claim that you have license to use them. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license.
 
 If you're a dataset owner and wish to update any part of it (description, citation, etc.), or do not want your dataset to be included in this library, please get in touch through a [GitHub issue](https://github.com/huggingface/datasets/issues/new). Thanks for your contribution to the ML community!
 

diff --git a/additional-tests-requirements.txt b/additional-tests-requirements.txt
diff --git a/docs/source/about_metrics.mdx b/docs/source/about_metrics.mdx
@@ -1,5 +1,11 @@
 # All about metrics
 
+<Tip warning={true}>
+
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
+
+</Tip>
+
 🤗 Datasets provides access to a wide range of NLP metrics. You can load metrics associated with benchmark datasets like GLUE or SQuAD, and complex metrics like BLEURT or BERTScore, with a single command: [`load_metric`]. Once you've loaded a metric, easily compute and evaluate a model's performance.
 
 ## ELI5: `load_metric`

diff --git a/docs/source/how_to_metrics.mdx b/docs/source/how_to_metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics are deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -2,9 +2,9 @@
 
 <img class="float-left !m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[150px]" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/datasets_logo.png"/>
 
-🤗 Datasets is a library for easily accessing and sharing datasets, and evaluation metrics for Natural Language Processing (NLP), computer vision, and audio tasks.
+🤗 Datasets is a library for easily accessing and sharing datasets for Natural Language Processing (NLP), computer vision, and audio tasks.
 
-Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets, and more than 34 metrics available. 
+Load a dataset in a single line of code, and use our powerful data processing methods to quickly get your dataset ready for training in a deep learning model. Backed by the Apache Arrow format, process large datasets with zero-copy reads without any memory constraints for optimal speed and efficiency. We also feature a deep integration with the [Hugging Face Hub](https://huggingface.co/datasets), allowing you to easily load and share a dataset with the wider NLP community. There are currently over 2658 datasets available.
 
 Find your dataset today on the [Hugging Face Hub](https://huggingface.co/datasets), and take an in-depth look inside of it with the live viewer.
 

diff --git a/docs/source/loading.mdx b/docs/source/loading.mdx
@@ -340,7 +340,7 @@ Now when you look at your dataset features, you can see it uses the custom label
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 

diff --git a/docs/source/metrics.mdx b/docs/source/metrics.mdx
@@ -2,7 +2,7 @@
 
 <Tip warning={true}>
 
-Metrics will soon be deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at our newest library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, we've also added more tools for evaluating models and datasets.
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
 
 </Tip>
 

diff --git a/docs/source/package_reference/loading_methods.mdx b/docs/source/package_reference/loading_methods.mdx
@@ -22,6 +22,12 @@ Methods for listing and loading datasets and metrics:
 
 ## Metrics
 
+<Tip warning={true}>
+
+Metrics is deprecated in 🤗 Datasets. To learn more about how to use metrics, take a look at the library 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index)! In addition to metrics, you can find more tools for evaluating models and datasets.
+
+</Tip>
+
 [[autodoc]] datasets.list_metrics
 
 [[autodoc]] datasets.load_metric

diff --git a/setup.py b/setup.py
@@ -123,6 +123,7 @@
     "botocore>=1.22.8",  # to be compatible with aiobotocore and boto3
     "faiss-cpu>=1.6.4",
     "fsspec[s3]",
+    "lz4",
     "moto[s3,server]==2.0.4",
     "rarfile>=4.0",
     "s3fs>=2021.11.1",  # aligned with fsspec[http]>=2021.11.1
@@ -135,29 +136,14 @@
     "bs4",
     "conllu",
     "h5py",
-    "langdetect",
     "lxml",
-    "lz4",
     "mwparserfromhell",
-    "nltk",
     "openpyxl",
     "py7zr",
-    "tldextract",
     "zstandard",
     "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz",
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
-    "sacremoses",
-    # metrics dependencies
-    "bert_score>=0.3.6",
-    "jiwer",
-    "mauve-text",
-    "rouge_score<0.0.7",
-    "sacrebleu",
-    "sacremoses",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",  # for bleurt
-    "seqeval",
+    "rouge_score<0.0.7",  # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
     # to speed up pip backtracking
     "toml>=0.10.1",
     "requests_file>=1.5.1",

diff --git a/src/datasets/inspect.py b/src/datasets/inspect.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Lint as: python3
-""" List and inspect datasets and metrics."""
+""" List and inspect datasets."""
 
 import inspect
 import os
@@ -28,6 +28,7 @@
 from .download.streaming_download_manager import StreamingDownloadManager
 from .info import DatasetInfo
 from .load import dataset_module_factory, import_main_class, load_dataset_builder, metric_module_factory
+from .utils.deprecation_utils import deprecated
 from .utils.file_utils import relative_to_absolute_path
 from .utils.logging import get_logger
 from .utils.version import Version
@@ -70,9 +71,16 @@ def list_datasets(with_community_datasets=True, with_details=False):
     return datasets
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def list_metrics(with_community_metrics=True, with_details=False):
     """List all the metrics script available on the Hugging Face Hub.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
         with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
         with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.
@@ -138,10 +146,17 @@ def inspect_dataset(path: str, local_path: str, download_config: Optional[Downlo
     )
 
 
+@deprecated("Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
 def inspect_metric(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
     r"""
     Allow inspection/modification of a metric script by copying it on local drive at local_path.
 
+    <Deprecated version="2.4.0">
+
+    Use our new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+
+    </Deprecated>
+
     Args:
         path (``str``): path to the dataset processing script with the dataset builder. Can be either: