Skip to content

Commit

Permalink
BUG: NumericIndex should not support float16 dtype (#49536)
Browse files Browse the repository at this point in the history
* BUG: NumericIndex should not support float16 dtype

* make NumericIndex fail with float16 dtype

* make NumericIndex fail with float16 dtype, II

* fix failures

* NotImplementedError

* NotImplementedError II

* fail on float16, but allow np.exp(int8_arrays)

* fail on float16, but allow np.exp(int8_arrays) II

* fix NumericIndex_ensure_dtype

Co-authored-by: Terji Petersen <terjipetersen@Terjis-Air.fritz.box>
Co-authored-by: Terji Petersen <terjipetersen@Terjis-MacBook-Air.local>
  • Loading branch information
3 people committed Dec 27, 2022
1 parent 35a7f80 commit 38b4e96
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 16 deletions.
2 changes: 2 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,8 @@ def value_counts(
else:
values = _ensure_arraylike(values)
keys, counts = value_counts_arraylike(values, dropna)
if keys.dtype == np.float16:
keys = keys.astype(np.float32)

# For backwards compatibility, we let Index do its normal type
# inference, _except_ for if if infers from object to bool.
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,8 @@ def factorize(
codes, uniques = algorithms.factorize(
self._values, sort=sort, use_na_sentinel=use_na_sentinel
)
if uniques.dtype == np.float16:
uniques = uniques.astype(np.float32)

if isinstance(self, ABCIndex):
# preserve e.g. NumericIndex, preserve MultiIndex
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
# i.e. np.divmod, np.modf, np.frexp
return tuple(self.__array_wrap__(x) for x in result)

if result.dtype == np.float16:
result = result.astype(np.float32)

return self.__array_wrap__(result)

def __array_wrap__(self, result, context=None):
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class NumericIndex(Index):
Notes
-----
An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or
float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric
dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
float64/32 dtype. In particular, ``NumericIndex`` *can not* hold numpy float16
dtype or Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
"""

_typ = "numericindex"
Expand Down Expand Up @@ -133,6 +133,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
Ensure we have a valid array to pass to _simple_new.
"""
cls._validate_dtype(dtype)
if dtype == np.float16:

# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if not isinstance(data, (np.ndarray, Index)):
# Coerce to ndarray if not already ndarray or Index
Expand Down Expand Up @@ -176,6 +180,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
raise ValueError("Index data must be 1-dimensional")

subarr = np.asarray(subarr)
if subarr.dtype == "float16":
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not implemented")

return subarr

@classmethod
Expand All @@ -202,6 +210,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
dtype = pandas_dtype(dtype)
if not isinstance(dtype, np.dtype):
raise TypeError(f"{dtype} not a numpy type")
elif dtype == np.float16:
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if cls._is_backward_compat_public_numeric_index:
# dtype for NumericIndex
Expand Down
14 changes: 4 additions & 10 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from datetime import timedelta
from decimal import Decimal
import operator
from typing import Any

import numpy as np
import pytest
Expand Down Expand Up @@ -72,15 +71,10 @@ def compare_op(series, other, op):
# TODO: remove this kludge once mypy stops giving false positives here
# List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex]
# See GH#29725
ser_or_index: list[Any] = [Series, Index]
lefts: list[Any] = [RangeIndex(10, 40, 10)]
lefts.extend(
[
cls([10, 20, 30], dtype=dtype)
for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"]
for cls in ser_or_index
]
)
_ldtypes = ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"]
lefts: list[Index | Series] = [RangeIndex(10, 40, 10)]
lefts.extend([Series([10, 20, 30], dtype=dtype) for dtype in _ldtypes])
lefts.extend([Index([10, 20, 30], dtype=dtype) for dtype in _ldtypes if dtype != "f2"])

# ------------------------------------------------------------------
# Comparisons
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def test_iterable(self, index_or_series, method, dtype, rdtype):
# gh-13258
# coerce iteration to underlying python / pandas types
typ = index_or_series
if dtype == "float16" and issubclass(typ, pd.Index):
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
typ([1], dtype=dtype)
return
s = typ([1], dtype=dtype)
result = method(s)[0]
assert isinstance(result, rdtype)
Expand Down Expand Up @@ -115,6 +119,10 @@ def test_iterable_map(self, index_or_series, dtype, rdtype):
# gh-13236
# coerce iteration to underlying python / pandas types
typ = index_or_series
if dtype == "float16" and issubclass(typ, pd.Index):
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
typ([1], dtype=dtype)
return
s = typ([1], dtype=dtype)
result = s.map(type)[0]
if not isinstance(rdtype, tuple):
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@ def test_value_counts(index_or_series_obj):

counter = collections.Counter(obj)
expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name)
expected.index = expected.index.astype(obj.dtype)

if obj.dtype != np.float16:
expected.index = expected.index.astype(obj.dtype)
else:
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
expected.index.astype(obj.dtype)
return

if not isinstance(result.dtype, np.dtype):
# i.e IntegerDtype
Expand Down Expand Up @@ -73,7 +79,13 @@ def test_value_counts_null(null_obj, index_or_series_obj):
# np.nan would be duplicated, whereas None wouldn't
counter = collections.Counter(obj.dropna())
expected = Series(dict(counter.most_common()), dtype=np.int64)
expected.index = expected.index.astype(obj.dtype)

if obj.dtype != np.float16:
expected.index = expected.index.astype(obj.dtype)
else:
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
expected.index.astype(obj.dtype)
return

result = obj.value_counts()
if obj.duplicated().any():
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/indexes/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,44 @@ def test_coerce_list(self):
assert type(arr) is Index


class TestFloat16Index:
# float 16 indexes not supported
# GH 49535
_index_cls = NumericIndex

def test_constructor(self):
index_cls = self._index_cls
dtype = np.float16

msg = "float16 indexes are not supported"

# explicit construction
with pytest.raises(NotImplementedError, match=msg):
index_cls([1, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls([1.0, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls([1.0, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)

# nan handling
with pytest.raises(NotImplementedError, match=msg):
index_cls([np.nan, np.nan], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([np.nan]), dtype=dtype)


class TestUIntNumericIndex(NumericInt):

_index_cls = NumericIndex
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def test_numpy_ufuncs_basic(index, func):
# coerces to float (e.g. np.sin)
with np.errstate(all="ignore"):
result = func(index)
exp = Index(func(index.values), name=index.name)
arr_result = func(index.values)
if arr_result.dtype == np.float16:
arr_result = arr_result.astype(np.float32)
exp = Index(arr_result, name=index.name)

tm.assert_index_equal(result, exp)
if type(index) is not Index or index.dtype == bool:
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ def test_factorize(self, index_or_series_obj, sort):
constructor = Index
if isinstance(obj, MultiIndex):
constructor = MultiIndex.from_tuples
expected_uniques = constructor(obj.unique())
expected_arr = obj.unique()
if expected_arr.dtype == np.float16:
expected_arr = expected_arr.astype(np.float32)
expected_uniques = constructor(expected_arr)
if (
isinstance(obj, Index)
and expected_uniques.dtype == bool
Expand Down

0 comments on commit 38b4e96

Please sign in to comment.