huggingface · lhoestq · Sep 8, 2022 · Sep 9, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -56,13 +56,8 @@ jobs:
           python-version: 3.7
       - name: Upgrade pip
         run: python -m pip install --upgrade pip
-      - name: Pin setuptools-scm
-        if: ${{ matrix.os == 'ubuntu-latest' }}
-        run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
       - name: Install dependencies
-        run: |
-          pip install .[tests]
-          pip install -r additional-tests-requirements.txt --no-deps
+        run: pip install .[tests]
       - name: Install latest PyArrow
         if: ${{ matrix.pyarrow_version == 'latest' }}
         run: pip install pyarrow --upgrade
@@ -72,3 +67,41 @@ jobs:
       - name: Test with pytest
         run: |
           python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
+
+  test-catalog:
+    needs: check_code_quality
+    strategy:
+      matrix:
+        test: [datasets_catalog, metrics_catalog]
+        os: [ubuntu-latest, windows-latest]
+    continue-on-error: false
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Install OS dependencies
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+        run: |
+          sudo apt-get -y update
+          sudo apt-get -y install libsndfile1 sox
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.7
+      - name: Upgrade pip
+        run: python -m pip install --upgrade pip
+      - name: Pin setuptools-scm
+        if: ${{ matrix.os == 'ubuntu-latest' && matrix.test == 'metrics_catalog' }}
+        run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
+      - name: Install dependencies
+        if: ${{ matrix.test == 'datasets_catalog' }}
+        run: pip install .[tests,test_datasets_catalog]
+      - name: Install dependencies
+        if: ${{ matrix.test == 'metrics_catalog' }}
+        run: |
+          pip install .[tests,test_metrics_catalog]
+          pip install -r additional-tests-requirements-for-metrics.txt --no-deps
+      - name: Test with pytest
+        run: |
+          python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
diff --git a/additional-tests-requirements.txt → ...tional-tests-requirements-for-metrics.txt b/additional-tests-requirements.txt → ...tional-tests-requirements-for-metrics.txt
diff --git a/setup.py b/setup.py
@@ -129,6 +129,10 @@
     "torchaudio<0.12.0",
     "soundfile",
     "transformers",
+    "zstandard",
+]
+
+DATASET_SCRIPTS_REQUIRE = [
     # datasets dependencies
     "bs4",
     "conllu",
@@ -142,27 +146,23 @@
     "sentencepiece",  # bigbench requires t5 which requires seqio which requires sentencepiece
     "rouge_score",  # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
     "sacremoses",
+]
+
+METRIC_SCRIPTS_REQUIRE = [
     # metrics dependencies
     "bert_score>=0.3.6",
     "jiwer",
     "langdetect",
     "mauve-text",
     "nltk",
-    # "rouge_score",  # also required by bigbench
+    "rouge_score",
     "sacrebleu",
     "sacremoses",
     "scikit-learn",
     "scipy",
     "sentencepiece",  # for bleurt
     "seqeval",
     "tldextract",
-    # to speed up pip backtracking
-    "toml>=0.10.1",
-    "requests_file>=1.5.1",
-    "tldextract>=3.1.0",
-    "texttable>=1.6.3",
-    "Werkzeug>=1.0.1",
-    "six~=1.15.0",
 ]
 
 TESTS_REQUIRE.extend(VISION_REQURE)
@@ -187,6 +187,8 @@
     "streaming": [],  # for backward compatibility
     "dev": TESTS_REQUIRE + QUALITY_REQUIRE,
     "tests": TESTS_REQUIRE,
+    "test_datasets_catalog": DATASET_SCRIPTS_REQUIRE,
+    "test_metrics_catalog": METRIC_SCRIPTS_REQUIRE,
     "quality": QUALITY_REQUIRE,
     "benchmarks": BENCHMARKS_REQUIRE,
     "docs": [

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,7 +10,7 @@
 def pytest_collection_modifyitems(config, items):
     # Mark tests as "unit" by default if not marked as "integration" (or already marked as "unit")
     for item in items:
-        if any(marker in item.keywords for marker in ["integration", "unit"]):
+        if any(marker in item.keywords for marker in ["integration", "unit", "datasets_catalog", "metrics_catalog"]):
             continue
         item.add_marker(pytest.mark.unit)
 

diff --git a/tests/test_dataset_common.py b/tests/test_dataset_common.py
@@ -35,7 +35,7 @@
 from datasets.utils.file_utils import cached_path, is_remote_url
 from datasets.utils.logging import get_logger
 
-from .utils import OfflineSimulationMode, for_all_test_methods, local, offline, packaged, slow
+from .utils import OfflineSimulationMode, for_all_test_methods, offline, packaged, slow
 
 
 logger = get_logger(__name__)
@@ -226,8 +226,8 @@ def get_local_dataset_names():
 
 @parameterized.named_parameters(get_local_dataset_names())
 @for_all_test_methods(skip_if_dataset_requires_faiss, skip_if_not_compatible_with_windows)
-@local
-class LocalDatasetTest(parameterized.TestCase):
+@pytest.mark.datasets_catalog
+class DatasetTest(parameterized.TestCase):
     dataset_name = None
 
     def setUp(self):

diff --git a/tests/test_metric_common.py b/tests/test_metric_common.py
@@ -29,7 +29,7 @@
 import datasets
 from datasets import load_metric
 
-from .utils import for_all_test_methods, local, slow
+from .utils import for_all_test_methods, slow
 
 
 REQUIRE_FAIRSEQ = {"comet"}
@@ -68,9 +68,8 @@ def get_local_metric_names():
 
 @parameterized.named_parameters(get_local_metric_names())
 @for_all_test_methods(skip_if_metric_requires_fairseq, skip_on_windows_if_not_windows_compatible)
-@local
-@pytest.mark.integration
-class LocalMetricTest(parameterized.TestCase):
+@pytest.mark.metrics_catalog
+class MetricTest(parameterized.TestCase):
     INTENSIVE_CALLS_PATCHER = {}
     metric_name = None
 
@@ -136,7 +135,7 @@ def wrapper(patcher):
 # --------------------------------
 
 
-@LocalMetricTest.register_intensive_calls_patcher("bleurt")
+@MetricTest.register_intensive_calls_patcher("bleurt")
 def patch_bleurt(module_name):
     import tensorflow.compat.v1 as tf
     from bleurt.score import Predictor
@@ -154,7 +153,7 @@ def predict(self, input_dict):
         yield
 
 
-@LocalMetricTest.register_intensive_calls_patcher("bertscore")
+@MetricTest.register_intensive_calls_patcher("bertscore")
 def patch_bertscore(module_name):
     import torch
 
@@ -170,7 +169,7 @@ def bert_cos_score_idf(model, refs, *args, **kwargs):
         yield
 
 
-@LocalMetricTest.register_intensive_calls_patcher("comet")
+@MetricTest.register_intensive_calls_patcher("comet")
 def patch_comet(module_name):
     def load_from_checkpoint(model_path):
         class Model:
@@ -190,6 +189,7 @@ def predict(self, data, *args, **kwargs):
             yield
 
 
+@pytest.mark.metrics_catalog
 def test_seqeval_raises_when_incorrect_scheme():
     metric = load_metric(os.path.join("metrics", "seqeval"))
     wrong_scheme = "ERROR"

diff --git a/tests/utils.py b/tests/utils.py
@@ -213,18 +213,6 @@ def slow(test_case):
     return test_case
 
 
-def local(test_case):
-    """
-    Decorator marking a test as local
-
-    Local tests are run by default. Set the RUN_LOCAL environment variable
-    to a falsy value to not run them.
-    """
-    if not _run_local_tests or _run_local_tests == 0:
-        test_case = unittest.skip("test is local")(test_case)
-    return test_case
-
-
 def packaged(test_case):
     """
     Decorator marking a test as packaged