Skip to content

Commit

Permalink
Remove 3.6 pickling test
Browse files Browse the repository at this point in the history
  • Loading branch information
mariosasko committed Jul 20, 2022
1 parent 1cdcaa5 commit 30de7fa
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 17 deletions.
2 changes: 1 addition & 1 deletion tests/test_arrow_dataset.py
Expand Up @@ -3119,7 +3119,7 @@ def test_pickle_dataset_after_transforming_the_table(in_memory, method_and_param


@pytest.mark.skipif(
os.name in ["nt", "posix"] and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"),
os.name == "nt" and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"),
reason='On Windows CircleCI or GitHub Actions, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"',
) # TODO: find what's wrong with CircleCI / GitHub Actions
@require_s3
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dataset_dict.py
Expand Up @@ -665,7 +665,7 @@ def test_datasetdict_from_text_split(split, text_path, tmp_path):


@pytest.mark.skipif(
os.name in ["nt", "posix"] and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"),
os.name == "nt" and (os.getenv("CIRCLECI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"),
reason='On Windows CircleCI or GitHub Actions, it raises botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL: "http://127.0.0.1:5555/test"',
) # TODO: find what's wrong with CircleCI / GitHub Actions
@require_s3
Expand Down
15 changes: 0 additions & 15 deletions tests/test_fingerprint.py
Expand Up @@ -226,21 +226,6 @@ def globalvars_mock2_side_effect(func, *args, **kwargs):
self.assertEqual(hash1, hash2)


class TypeHintDumpTest(TestCase):
def test_dump_type_hint(self):
from typing import Union

t1 = Union[str, None] # this type is not picklable in python 3.6
# let's check that we can pickle it anyway using our pickler, even in 3.6
hash1 = md5(datasets.utils.py_utils.dumps(t1)).hexdigest()
t2 = Union[str] # this type is picklable in python 3.6
hash2 = md5(datasets.utils.py_utils.dumps(t2)).hexdigest()
t3 = Union[str, None]
hash3 = md5(datasets.utils.py_utils.dumps(t3)).hexdigest()
self.assertEqual(hash1, hash3)
self.assertNotEqual(hash1, hash2)


class HashingTest(TestCase):
def test_hash_simple(self):
hash1 = Hasher.hash("hello")
Expand Down

1 comment on commit 30de7fa

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==6.0.0

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.007934 / 0.011353 (-0.003419) 0.003809 / 0.011008 (-0.007199) 0.029593 / 0.038508 (-0.008915) 0.034466 / 0.023109 (0.011357) 0.293366 / 0.275898 (0.017468) 0.350676 / 0.323480 (0.027196) 0.005825 / 0.007986 (-0.002160) 0.004584 / 0.004328 (0.000256) 0.006917 / 0.004250 (0.002667) 0.049401 / 0.037052 (0.012349) 0.312584 / 0.258489 (0.054095) 0.347507 / 0.293841 (0.053666) 0.030552 / 0.128546 (-0.097994) 0.009350 / 0.075646 (-0.066297) 0.258551 / 0.419271 (-0.160720) 0.050112 / 0.043533 (0.006579) 0.297624 / 0.255139 (0.042485) 0.315088 / 0.283200 (0.031889) 0.099776 / 0.141683 (-0.041907) 1.438281 / 1.452155 (-0.013874) 1.495152 / 1.492716 (0.002435)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.224163 / 0.018006 (0.206157) 0.457148 / 0.000490 (0.456658) 0.004868 / 0.000200 (0.004668) 0.000087 / 0.000054 (0.000033)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.025221 / 0.037411 (-0.012191) 0.104391 / 0.014526 (0.089865) 0.119341 / 0.176557 (-0.057215) 0.172717 / 0.737135 (-0.564418) 0.122277 / 0.296338 (-0.174062)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.394440 / 0.215209 (0.179231) 3.917615 / 2.077655 (1.839960) 1.781216 / 1.504120 (0.277096) 1.592900 / 1.541195 (0.051705) 1.641644 / 1.468490 (0.173154) 0.420590 / 4.584777 (-4.164187) 3.802199 / 3.745712 (0.056487) 3.686170 / 5.269862 (-1.583692) 1.775204 / 4.565676 (-2.790473) 0.051894 / 0.424275 (-0.372381) 0.011679 / 0.007607 (0.004072) 0.501799 / 0.226044 (0.275754) 5.024170 / 2.268929 (2.755242) 2.237230 / 55.444624 (-53.207394) 1.877013 / 6.876477 (-4.999464) 2.001046 / 2.142072 (-0.141026) 0.552063 / 4.805227 (-4.253164) 0.118060 / 6.500664 (-6.382604) 0.060127 / 0.075469 (-0.015342)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.503291 / 1.841788 (-0.338497) 13.472666 / 8.074308 (5.398358) 25.092047 / 10.191392 (14.900655) 0.899841 / 0.680424 (0.219418) 0.565431 / 0.534201 (0.031230) 0.384163 / 0.579283 (-0.195120) 0.428677 / 0.434364 (-0.005686) 0.272435 / 0.540337 (-0.267902) 0.270868 / 1.386936 (-1.116069)
PyArrow==latest
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.006280 / 0.011353 (-0.005073) 0.003879 / 0.011008 (-0.007129) 0.027926 / 0.038508 (-0.010582) 0.033641 / 0.023109 (0.010532) 0.296791 / 0.275898 (0.020893) 0.363856 / 0.323480 (0.040376) 0.003865 / 0.007986 (-0.004121) 0.003465 / 0.004328 (-0.000863) 0.004916 / 0.004250 (0.000666) 0.047378 / 0.037052 (0.010326) 0.301907 / 0.258489 (0.043418) 0.353680 / 0.293841 (0.059839) 0.030159 / 0.128546 (-0.098387) 0.009847 / 0.075646 (-0.065799) 0.257457 / 0.419271 (-0.161815) 0.054606 / 0.043533 (0.011073) 0.297654 / 0.255139 (0.042515) 0.331306 / 0.283200 (0.048106) 0.104637 / 0.141683 (-0.037046) 1.489677 / 1.452155 (0.037522) 1.523096 / 1.492716 (0.030379)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.290880 / 0.018006 (0.272873) 0.474861 / 0.000490 (0.474371) 0.058728 / 0.000200 (0.058528) 0.000341 / 0.000054 (0.000287)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.026404 / 0.037411 (-0.011007) 0.105677 / 0.014526 (0.091151) 0.118019 / 0.176557 (-0.058537) 0.158054 / 0.737135 (-0.579082) 0.123962 / 0.296338 (-0.172376)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.418142 / 0.215209 (0.202933) 4.148880 / 2.077655 (2.071225) 2.028595 / 1.504120 (0.524475) 1.851696 / 1.541195 (0.310502) 1.950960 / 1.468490 (0.482470) 0.435340 / 4.584777 (-4.149437) 3.918111 / 3.745712 (0.172399) 2.116364 / 5.269862 (-3.153497) 1.252478 / 4.565676 (-3.313198) 0.051951 / 0.424275 (-0.372324) 0.010945 / 0.007607 (0.003338) 0.522569 / 0.226044 (0.296525) 5.217879 / 2.268929 (2.948951) 2.447647 / 55.444624 (-52.996977) 2.128439 / 6.876477 (-4.748038) 2.270096 / 2.142072 (0.128024) 0.538137 / 4.805227 (-4.267091) 0.122665 / 6.500664 (-6.377999) 0.062288 / 0.075469 (-0.013181)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.473965 / 1.841788 (-0.367822) 14.442889 / 8.074308 (6.368581) 25.318770 / 10.191392 (15.127378) 0.880065 / 0.680424 (0.199641) 0.532319 / 0.534201 (-0.001882) 0.388829 / 0.579283 (-0.190454) 0.453835 / 0.434364 (0.019471) 0.279115 / 0.540337 (-0.261223) 0.295859 / 1.386936 (-1.091077)

CML watermark

Please sign in to comment.