Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test-datasets CI job #4952

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 39 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,8 @@ jobs:
python-version: 3.7
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Pin setuptools-scm
if: ${{ matrix.os == 'ubuntu-latest' }}
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
- name: Install dependencies
run: |
pip install .[tests]
pip install -r additional-tests-requirements.txt --no-deps
run: pip install .[tests]
- name: Install latest PyArrow
if: ${{ matrix.pyarrow_version == 'latest' }}
run: pip install pyarrow --upgrade
Expand All @@ -72,3 +67,41 @@ jobs:
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/

test-catalog:
needs: check_code_quality
strategy:
matrix:
test: [datasets_catalog, metrics_catalog]
os: [ubuntu-latest, windows-latest]
continue-on-error: false
runs-on: ${{ matrix.os }}
steps:
- name: Install OS dependencies
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
sudo apt-get -y update
sudo apt-get -y install libsndfile1 sox
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Pin setuptools-scm
if: ${{ matrix.os == 'ubuntu-latest' && matrix.test == 'metrics_catalog' }}
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
- name: Install dependencies
if: ${{ matrix.test == 'datasets_catalog' }}
run: pip install .[tests,test_datasets_catalog]
- name: Install dependencies
if: ${{ matrix.test == 'metrics_catalog' }}
run: |
pip install .[tests,test_metrics_catalog]
pip install -r additional-tests-requirements-for-metrics.txt --no-deps
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
18 changes: 10 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@
"torchaudio<0.12.0",
"soundfile",
"transformers",
"zstandard",
]

DATASET_SCRIPTS_REQUIRE = [
# datasets dependencies
"bs4",
"conllu",
Expand All @@ -142,27 +146,23 @@
"sentencepiece", # bigbench requires t5 which requires seqio which requires sentencepiece
"rouge_score", # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
"sacremoses",
]

METRIC_SCRIPTS_REQUIRE = [
# metrics dependencies
"bert_score>=0.3.6",
"jiwer",
"langdetect",
"mauve-text",
"nltk",
# "rouge_score", # also required by bigbench
"rouge_score",
"sacrebleu",
"sacremoses",
"scikit-learn",
"scipy",
"sentencepiece", # for bleurt
"seqeval",
"tldextract",
# to speed up pip backtracking
"toml>=0.10.1",
"requests_file>=1.5.1",
"tldextract>=3.1.0",
"texttable>=1.6.3",
"Werkzeug>=1.0.1",
"six~=1.15.0",
]

TESTS_REQUIRE.extend(VISION_REQURE)
Expand All @@ -187,6 +187,8 @@
"streaming": [], # for backward compatibility
"dev": TESTS_REQUIRE + QUALITY_REQUIRE,
"tests": TESTS_REQUIRE,
"test_datasets_catalog": DATASET_SCRIPTS_REQUIRE,
"test_metrics_catalog": METRIC_SCRIPTS_REQUIRE,
"quality": QUALITY_REQUIRE,
"benchmarks": BENCHMARKS_REQUIRE,
"docs": [
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def pytest_collection_modifyitems(config, items):
# Mark tests as "unit" by default if not marked as "integration" (or already marked as "unit")
for item in items:
if any(marker in item.keywords for marker in ["integration", "unit"]):
if any(marker in item.keywords for marker in ["integration", "unit", "datasets_catalog", "metrics_catalog"]):
continue
item.add_marker(pytest.mark.unit)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from datasets.utils.file_utils import cached_path, is_remote_url
from datasets.utils.logging import get_logger

from .utils import OfflineSimulationMode, for_all_test_methods, local, offline, packaged, slow
from .utils import OfflineSimulationMode, for_all_test_methods, offline, packaged, slow


logger = get_logger(__name__)
Expand Down Expand Up @@ -226,8 +226,8 @@ def get_local_dataset_names():

@parameterized.named_parameters(get_local_dataset_names())
@for_all_test_methods(skip_if_dataset_requires_faiss, skip_if_not_compatible_with_windows)
@local
class LocalDatasetTest(parameterized.TestCase):
@pytest.mark.datasets_catalog
class DatasetTest(parameterized.TestCase):
dataset_name = None

def setUp(self):
Expand Down
14 changes: 7 additions & 7 deletions tests/test_metric_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import datasets
from datasets import load_metric

from .utils import for_all_test_methods, local, slow
from .utils import for_all_test_methods, slow


REQUIRE_FAIRSEQ = {"comet"}
Expand Down Expand Up @@ -68,9 +68,8 @@ def get_local_metric_names():

@parameterized.named_parameters(get_local_metric_names())
@for_all_test_methods(skip_if_metric_requires_fairseq, skip_on_windows_if_not_windows_compatible)
@local
@pytest.mark.integration
class LocalMetricTest(parameterized.TestCase):
@pytest.mark.metrics_catalog
class MetricTest(parameterized.TestCase):
INTENSIVE_CALLS_PATCHER = {}
metric_name = None

Expand Down Expand Up @@ -136,7 +135,7 @@ def wrapper(patcher):
# --------------------------------


@LocalMetricTest.register_intensive_calls_patcher("bleurt")
@MetricTest.register_intensive_calls_patcher("bleurt")
def patch_bleurt(module_name):
import tensorflow.compat.v1 as tf
from bleurt.score import Predictor
Expand All @@ -154,7 +153,7 @@ def predict(self, input_dict):
yield


@LocalMetricTest.register_intensive_calls_patcher("bertscore")
@MetricTest.register_intensive_calls_patcher("bertscore")
def patch_bertscore(module_name):
import torch

Expand All @@ -170,7 +169,7 @@ def bert_cos_score_idf(model, refs, *args, **kwargs):
yield


@LocalMetricTest.register_intensive_calls_patcher("comet")
@MetricTest.register_intensive_calls_patcher("comet")
def patch_comet(module_name):
def load_from_checkpoint(model_path):
class Model:
Expand All @@ -190,6 +189,7 @@ def predict(self, data, *args, **kwargs):
yield


@pytest.mark.metrics_catalog
def test_seqeval_raises_when_incorrect_scheme():
metric = load_metric(os.path.join("metrics", "seqeval"))
wrong_scheme = "ERROR"
Expand Down
12 changes: 0 additions & 12 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,6 @@ def slow(test_case):
return test_case


def local(test_case):
"""
Decorator marking a test as local

Local tests are run by default. Set the RUN_LOCAL environment variable
to a falsy value to not run them.
"""
if not _run_local_tests or _run_local_tests == 0:
test_case = unittest.skip("test is local")(test_case)
return test_case


def packaged(test_case):
"""
Decorator marking a test as packaged
Expand Down