Skip to content

Commit

Permalink
use pytest markers instead
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Sep 8, 2022
1 parent c44e4f4 commit a3f26c1
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 36 deletions.
45 changes: 39 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,8 @@ jobs:
python-version: 3.7
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Pin setuptools-scm
if: ${{ matrix.os == 'ubuntu-latest' }}
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
- name: Install dependencies
run: |
pip install .[tests]
pip install -r additional-tests-requirements.txt --no-deps
run: pip install .[tests]
- name: Install latest PyArrow
if: ${{ matrix.pyarrow_version == 'latest' }}
run: pip install pyarrow --upgrade
Expand All @@ -72,3 +67,41 @@ jobs:
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
test-catalog:
needs: check_code_quality
strategy:
matrix:
test: [datasets_catalog, metrics_catalog]
os: [ubuntu-latest, windows-latest]
continue-on-error: false
runs-on: ${{ matrix.os }}
steps:
- name: Install OS dependencies
if: ${{ matrix.os == 'ubuntu-latest' }}
run: |
sudo apt-get -y update
sudo apt-get -y install libsndfile1 sox
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Pin setuptools-scm
if: ${{ matrix.os == 'ubuntu-latest' && matrix.test == 'metrics_catalog' }}
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
- name: Install dependencies
if: ${{ matrix.test == 'datasets_catalog' }}
run: pip install .[tests,test_datasets_catalog]
- name: Install dependencies
if: ${{ matrix.test == 'metrics_catalog' }}
run: |
pip install .[tests,test_metrics_catalog]
pip install -r additional-tests-requirements-for-metrics.txt --no-deps
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
File renamed without changes.
16 changes: 9 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@
"torchaudio<0.12.0",
"soundfile",
"transformers",
"zstandard",
]

DATASET_SCRIPTS_REQUIRE = [
# datasets dependencies
"bs4",
"conllu",
Expand All @@ -142,6 +146,9 @@
"sentencepiece", # bigbench requires t5 which requires seqio which requires sentencepiece
"rouge_score", # required by bigbench: bigbench.api.util.bb_utils > t5.evaluation.metrics > rouge_score
"sacremoses",
]

METRIC_SCRIPTS_REQUIRE = [
# metrics dependencies
"bert_score>=0.3.6",
"jiwer",
Expand All @@ -156,13 +163,6 @@
"sentencepiece", # for bleurt
"seqeval",
"tldextract",
# to speed up pip backtracking
"toml>=0.10.1",
"requests_file>=1.5.1",
"tldextract>=3.1.0",
"texttable>=1.6.3",
"Werkzeug>=1.0.1",
"six~=1.15.0",
]

TESTS_REQUIRE.extend(VISION_REQURE)
Expand All @@ -187,6 +187,8 @@
"streaming": [], # for backward compatibility
"dev": TESTS_REQUIRE + QUALITY_REQUIRE,
"tests": TESTS_REQUIRE,
"test_datasets_catalog": DATASET_SCRIPTS_REQUIRE,
"test_metrics_catalog": METRIC_SCRIPTS_REQUIRE,
"quality": QUALITY_REQUIRE,
"benchmarks": BENCHMARKS_REQUIRE,
"docs": [
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def pytest_collection_modifyitems(config, items):
# Mark tests as "unit" by default if not marked as "integration" (or already marked as "unit")
for item in items:
if any(marker in item.keywords for marker in ["integration", "unit"]):
if any(marker in item.keywords for marker in ["integration", "unit", "datasets_catalog", "metrics_catalog"]):
continue
item.add_marker(pytest.mark.unit)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from datasets.utils.file_utils import cached_path, is_remote_url
from datasets.utils.logging import get_logger

from .utils import OfflineSimulationMode, for_all_test_methods, local, offline, packaged, slow
from .utils import OfflineSimulationMode, for_all_test_methods, offline, packaged, slow


logger = get_logger(__name__)
Expand Down Expand Up @@ -226,8 +226,8 @@ def get_local_dataset_names():

@parameterized.named_parameters(get_local_dataset_names())
@for_all_test_methods(skip_if_dataset_requires_faiss, skip_if_not_compatible_with_windows)
@local
class LocalDatasetTest(parameterized.TestCase):
@pytest.mark.datasets_catalog
class DatasetTest(parameterized.TestCase):
dataset_name = None

def setUp(self):
Expand Down
13 changes: 6 additions & 7 deletions tests/test_metric_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import datasets
from datasets import load_metric

from .utils import for_all_test_methods, local, slow
from .utils import for_all_test_methods, slow


REQUIRE_FAIRSEQ = {"comet"}
Expand Down Expand Up @@ -68,9 +68,8 @@ def get_local_metric_names():

@parameterized.named_parameters(get_local_metric_names())
@for_all_test_methods(skip_if_metric_requires_fairseq, skip_on_windows_if_not_windows_compatible)
@local
@pytest.mark.integration
class LocalMetricTest(parameterized.TestCase):
@pytest.mark.metrics_catalog
class MetricTest(parameterized.TestCase):
INTENSIVE_CALLS_PATCHER = {}
metric_name = None

Expand Down Expand Up @@ -136,7 +135,7 @@ def wrapper(patcher):
# --------------------------------


@LocalMetricTest.register_intensive_calls_patcher("bleurt")
@MetricTest.register_intensive_calls_patcher("bleurt")
def patch_bleurt(module_name):
import tensorflow.compat.v1 as tf
from bleurt.score import Predictor
Expand All @@ -154,7 +153,7 @@ def predict(self, input_dict):
yield


@LocalMetricTest.register_intensive_calls_patcher("bertscore")
@MetricTest.register_intensive_calls_patcher("bertscore")
def patch_bertscore(module_name):
import torch

Expand All @@ -170,7 +169,7 @@ def bert_cos_score_idf(model, refs, *args, **kwargs):
yield


@LocalMetricTest.register_intensive_calls_patcher("comet")
@MetricTest.register_intensive_calls_patcher("comet")
def patch_comet(module_name):
def load_from_checkpoint(model_path):
class Model:
Expand Down
12 changes: 0 additions & 12 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,18 +213,6 @@ def slow(test_case):
return test_case


def local(test_case):
"""
Decorator marking a test as local
Local tests are run by default. Set the RUN_LOCAL environment variable
to a falsy value to not run them.
"""
if not _run_local_tests or _run_local_tests == 0:
test_case = unittest.skip("test is local")(test_case)
return test_case


def packaged(test_case):
"""
Decorator marking a test as packaged
Expand Down

1 comment on commit a3f26c1

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==6.0.0

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.007911 / 0.011353 (-0.003442) 0.003846 / 0.011008 (-0.007163) 0.029653 / 0.038508 (-0.008855) 0.034552 / 0.023109 (0.011443) 0.295039 / 0.275898 (0.019141) 0.357775 / 0.323480 (0.034295) 0.006030 / 0.007986 (-0.001956) 0.004734 / 0.004328 (0.000406) 0.006865 / 0.004250 (0.002615) 0.050806 / 0.037052 (0.013754) 0.309241 / 0.258489 (0.050752) 0.346976 / 0.293841 (0.053135) 0.031498 / 0.128546 (-0.097048) 0.009330 / 0.075646 (-0.066316) 0.257331 / 0.419271 (-0.161941) 0.051919 / 0.043533 (0.008387) 0.299153 / 0.255139 (0.044014) 0.320459 / 0.283200 (0.037259) 0.100311 / 0.141683 (-0.041372) 1.413344 / 1.452155 (-0.038810) 1.451009 / 1.492716 (-0.041708)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.206925 / 0.018006 (0.188918) 0.451922 / 0.000490 (0.451433) 0.003875 / 0.000200 (0.003675) 0.000096 / 0.000054 (0.000041)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.024034 / 0.037411 (-0.013378) 0.109325 / 0.014526 (0.094800) 0.124931 / 0.176557 (-0.051625) 0.181973 / 0.737135 (-0.555163) 0.127275 / 0.296338 (-0.169063)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.393800 / 0.215209 (0.178591) 3.924247 / 2.077655 (1.846592) 1.756671 / 1.504120 (0.252551) 1.565475 / 1.541195 (0.024280) 1.630939 / 1.468490 (0.162449) 0.422642 / 4.584777 (-4.162135) 3.785821 / 3.745712 (0.040109) 1.994542 / 5.269862 (-3.275320) 1.214336 / 4.565676 (-3.351341) 0.052466 / 0.424275 (-0.371809) 0.011202 / 0.007607 (0.003595) 0.497915 / 0.226044 (0.271871) 4.993577 / 2.268929 (2.724648) 2.210464 / 55.444624 (-53.234161) 1.881232 / 6.876477 (-4.995245) 2.044873 / 2.142072 (-0.097199) 0.543705 / 4.805227 (-4.261522) 0.119328 / 6.500664 (-6.381337) 0.060801 / 0.075469 (-0.014668)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.474452 / 1.841788 (-0.367335) 14.234853 / 8.074308 (6.160545) 24.922475 / 10.191392 (14.731083) 0.896744 / 0.680424 (0.216320) 0.574356 / 0.534201 (0.040155) 0.387901 / 0.579283 (-0.191382) 0.436050 / 0.434364 (0.001686) 0.276696 / 0.540337 (-0.263641) 0.282260 / 1.386936 (-1.104676)
PyArrow==latest
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.006069 / 0.011353 (-0.005284) 0.003887 / 0.011008 (-0.007121) 0.027831 / 0.038508 (-0.010677) 0.033848 / 0.023109 (0.010738) 0.335303 / 0.275898 (0.059405) 0.406224 / 0.323480 (0.082744) 0.003821 / 0.007986 (-0.004164) 0.004807 / 0.004328 (0.000478) 0.004918 / 0.004250 (0.000667) 0.043140 / 0.037052 (0.006088) 0.344479 / 0.258489 (0.085990) 0.393820 / 0.293841 (0.099979) 0.030177 / 0.128546 (-0.098369) 0.009608 / 0.075646 (-0.066038) 0.257018 / 0.419271 (-0.162253) 0.054298 / 0.043533 (0.010765) 0.333612 / 0.255139 (0.078473) 0.359911 / 0.283200 (0.076712) 0.102672 / 0.141683 (-0.039011) 1.475105 / 1.452155 (0.022950) 1.479993 / 1.492716 (-0.012724)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.215608 / 0.018006 (0.197602) 0.436987 / 0.000490 (0.436497) 0.003077 / 0.000200 (0.002877) 0.000098 / 0.000054 (0.000043)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.023487 / 0.037411 (-0.013925) 0.101350 / 0.014526 (0.086825) 0.114299 / 0.176557 (-0.062258) 0.158444 / 0.737135 (-0.578692) 0.116599 / 0.296338 (-0.179740)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.423344 / 0.215209 (0.208135) 4.220588 / 2.077655 (2.142934) 2.093146 / 1.504120 (0.589026) 1.919146 / 1.541195 (0.377951) 2.047201 / 1.468490 (0.578711) 0.424443 / 4.584777 (-4.160334) 3.768500 / 3.745712 (0.022788) 1.981895 / 5.269862 (-3.287966) 1.220360 / 4.565676 (-3.345316) 0.051530 / 0.424275 (-0.372745) 0.011130 / 0.007607 (0.003523) 0.524846 / 0.226044 (0.298802) 5.236592 / 2.268929 (2.967663) 2.533484 / 55.444624 (-52.911140) 2.197008 / 6.876477 (-4.679469) 2.322592 / 2.142072 (0.180519) 0.533749 / 4.805227 (-4.271479) 0.117646 / 6.500664 (-6.383018) 0.060058 / 0.075469 (-0.015411)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.547055 / 1.841788 (-0.294733) 13.848934 / 8.074308 (5.774626) 25.217585 / 10.191392 (15.026193) 0.933153 / 0.680424 (0.252729) 0.605269 / 0.534201 (0.071068) 0.388389 / 0.579283 (-0.190894) 0.472355 / 0.434364 (0.037991) 0.282346 / 0.540337 (-0.257992) 0.286038 / 1.386936 (-1.100898)

CML watermark

Please sign in to comment.