Avoid some FutureWarnings and DeprecationWarnings

huggingface · Jun 14, 2022 · 6eab0e4 · 6eab0e4 · github-actions · Jun 16, 2022
1 parent 3dbe753
commit 6eab0e4
Show file tree

Hide file tree

Showing 9 changed files with 12 additions and 14 deletions.
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -280,7 +280,7 @@ def _get_output_signature(
                 else:
                     np_arrays.append(np.array(array))
 
-            if np.issubdtype(np_arrays[0].dtype, np.integer) or np_arrays[0].dtype == np.bool:
+            if np.issubdtype(np_arrays[0].dtype, np.integer) or np_arrays[0].dtype == bool:
                 tf_dtype = tf.int64
                 np_dtype = np.int64
             elif np.issubdtype(np_arrays[0].dtype, np.number):
@@ -3663,7 +3663,7 @@ def _feature(values: Union[float, int, str, np.ndarray]) -> "tf.train.Feature":
                     return _float_feature([values.item()])
                 elif np.issubdtype(values.dtype, np.integer):
                     return _int64_feature([values.item()])
-                elif np.issubdtype(values.dtype, np.str):
+                elif np.issubdtype(values.dtype, str):
                     return _bytes_feature([values.item().encode()])
                 else:
                     raise ValueError(f"values={values} has dtype {values.dtype}, which cannot be serialized")

diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
@@ -810,7 +810,7 @@ def __getitem__(self, item: Union[int, slice, np.ndarray]) -> Union[np.ndarray,
     def take(
         self, indices: Sequence_[int], allow_fill: bool = False, fill_value: bool = None
     ) -> "PandasArrayExtensionArray":
-        indices: np.ndarray = np.asarray(indices, dtype=np.int)
+        indices: np.ndarray = np.asarray(indices, dtype=int)
         if allow_fill:
             fill_value = (
                 self.dtype.na_value if fill_value is None else np.asarray(fill_value, dtype=self.dtype.value_type)

diff --git a/src/datasets/formatting/formatting.py b/src/datasets/formatting/formatting.py
@@ -194,11 +194,11 @@ def _arrow_array_to_numpy(self, pa_array: pa.Array) -> np.ndarray:
                 array: List = pa_array.to_numpy(zero_copy_only=zero_copy_only).tolist()
         if len(array) > 0:
             if any(
-                (isinstance(x, np.ndarray) and (x.dtype == np.object or x.shape != array[0].shape))
+                (isinstance(x, np.ndarray) and (x.dtype == object or x.shape != array[0].shape))
                 or (isinstance(x, float) and np.isnan(x))
                 for x in array
             ):
-                return np.array(array, copy=False, **{**self.np_array_kwargs, "dtype": np.object})
+                return np.array(array, copy=False, **{**self.np_array_kwargs, "dtype": object})
         return np.array(array, copy=False, **self.np_array_kwargs)
 
 

diff --git a/src/datasets/formatting/jax_formatter.py b/src/datasets/formatting/jax_formatter.py
@@ -54,7 +54,7 @@ def _recursive_tensorize(self, data_struct: dict):
         # support for nested types like struct of list of struct
         if isinstance(data_struct, (list, np.ndarray)):
             data_struct = np.array(data_struct, copy=False)
-            if data_struct.dtype == np.object:  # jax arrays cannot be instantied from an array of objects
+            if data_struct.dtype == object:  # jax arrays cannot be instantied from an array of objects
                 return [self.recursive_tensorize(substruct) for substruct in data_struct]
         return self._tensorize(data_struct)
 

diff --git a/src/datasets/formatting/tf_formatter.py b/src/datasets/formatting/tf_formatter.py
@@ -65,9 +65,7 @@ def _tensorize(self, value):
     def _recursive_tensorize(self, data_struct: dict):
         # support for nested types like struct of list of struct
         if isinstance(data_struct, (list, np.ndarray)):
-            if (
-                data_struct.dtype == np.object
-            ):  # tensorflow tensors can sometimes be instantied from an array of objects
+            if data_struct.dtype == object:  # tensorflow tensors can sometimes be instantied from an array of objects
                 try:
                     return self._tensorize(data_struct)
                 except ValueError:

diff --git a/src/datasets/formatting/torch_formatter.py b/src/datasets/formatting/torch_formatter.py
@@ -46,7 +46,7 @@ def _recursive_tensorize(self, data_struct: dict):
         # support for nested types like struct of list of struct
         if isinstance(data_struct, (list, np.ndarray)):
             data_struct = np.array(data_struct, copy=False)
-            if data_struct.dtype == np.object:  # pytorch tensors cannot be instantied from an array of objects
+            if data_struct.dtype == object:  # pytorch tensors cannot be instantied from an array of objects
                 return [self.recursive_tensorize(substruct) for substruct in data_struct]
         return self._tensorize(data_struct)
 

diff --git a/src/datasets/utils/stratify.py b/src/datasets/utils/stratify.py
@@ -48,7 +48,7 @@ def approximate_mode(class_counts, n_draws, rng):
             need_to_add -= add_now
             if need_to_add == 0:
                 break
-    return floored.astype(np.int)
+    return floored.astype(int)
 
 
 def stratified_shuffle_split_generate_indices(y, n_train, n_test, rng, n_splits=10):

diff --git a/tests/features/test_array_xd.py b/tests/features/test_array_xd.py
@@ -335,7 +335,7 @@ def test_array_xd_with_none():
     dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
     dataset = datasets.Dataset.from_dict({"foo": [dummy_array, None, dummy_array]}, features=features)
     arr = NumpyArrowExtractor().extract_column(dataset._data)
-    assert isinstance(arr, np.ndarray) and arr.dtype == np.object and arr.shape == (3,)
+    assert isinstance(arr, np.ndarray) and arr.dtype == object and arr.shape == (3,)
     np.testing.assert_equal(arr[0], dummy_array)
     np.testing.assert_equal(arr[2], dummy_array)
     assert np.isnan(arr[1])  # a single np.nan value - np.all not needed

diff --git a/tests/test_builder.py b/tests/test_builder.py
@@ -837,8 +837,8 @@ def _generate_examples(self):
     "builder_class, kwargs",
     [
         (DummyBuilderWithVersion, {}),
-        (DummyBuilderWithBuilderConfigs, {"name": "custom"}),
-        (DummyBuilderWithCustomBuilderConfigs, {"name": "20220501.en"}),
+        (DummyBuilderWithBuilderConfigs, {"config_name": "custom"}),
+        (DummyBuilderWithCustomBuilderConfigs, {"config_name": "20220501.en"}),
         (DummyBuilderWithCustomBuilderConfigs, {"date": "20220501", "language": "ca"}),
     ],
 )