Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: NumericIndex should not support float16 dtype #49536

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,8 @@ def value_counts(
else:
values = _ensure_arraylike(values)
keys, counts = value_counts_arraylike(values, dropna)
if keys.dtype == np.float16:
keys = keys.astype(np.float32)

# For backwards compatibility, we let Index do its normal type
# inference, _except_ for if if infers from object to bool.
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,8 @@ def factorize(
codes, uniques = algorithms.factorize(
self._values, sort=sort, use_na_sentinel=use_na_sentinel
)
if uniques.dtype == np.float16:
uniques = uniques.astype(np.float32)

if isinstance(self, ABCIndex):
# preserve e.g. NumericIndex, preserve MultiIndex
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
# i.e. np.divmod, np.modf, np.frexp
return tuple(self.__array_wrap__(x) for x in result)

if result.dtype == np.float16:
result = result.astype(np.float32)

return self.__array_wrap__(result)

def __array_wrap__(self, result, context=None):
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class NumericIndex(Index):
Notes
-----
An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or
float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric
dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
float64/32 dtype. In particular, ``NumericIndex`` *can not* hold numpy float16
dtype or Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
"""

_typ = "numericindex"
Expand Down Expand Up @@ -133,6 +133,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
Ensure we have a valid array to pass to _simple_new.
"""
cls._validate_dtype(dtype)
if dtype == np.float16:

# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if not isinstance(data, (np.ndarray, Index)):
# Coerce to ndarray if not already ndarray or Index
Expand Down Expand Up @@ -176,6 +180,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
raise ValueError("Index data must be 1-dimensional")

subarr = np.asarray(subarr)
if subarr.dtype == "float16":
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not implemented")

return subarr

@classmethod
Expand All @@ -202,6 +210,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
dtype = pandas_dtype(dtype)
if not isinstance(dtype, np.dtype):
raise TypeError(f"{dtype} not a numpy type")
elif dtype == np.float16:
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if cls._is_backward_compat_public_numeric_index:
# dtype for NumericIndex
Expand Down
14 changes: 4 additions & 10 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from datetime import timedelta
from decimal import Decimal
import operator
from typing import Any

import numpy as np
import pytest
Expand Down Expand Up @@ -72,15 +71,10 @@ def compare_op(series, other, op):
# TODO: remove this kludge once mypy stops giving false positives here
# List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex]
# See GH#29725
ser_or_index: list[Any] = [Series, Index]
lefts: list[Any] = [RangeIndex(10, 40, 10)]
lefts.extend(
[
cls([10, 20, 30], dtype=dtype)
for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"]
for cls in ser_or_index
]
)
_ldtypes = ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"]
lefts: list[Index | Series] = [RangeIndex(10, 40, 10)]
lefts.extend([Series([10, 20, 30], dtype=dtype) for dtype in _ldtypes])
lefts.extend([Index([10, 20, 30], dtype=dtype) for dtype in _ldtypes if dtype != "f2"])

# ------------------------------------------------------------------
# Comparisons
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def test_iterable(self, index_or_series, method, dtype, rdtype):
# gh-13258
# coerce iteration to underlying python / pandas types
typ = index_or_series
if dtype == "float16" and issubclass(typ, pd.Index):
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
typ([1], dtype=dtype)
return
s = typ([1], dtype=dtype)
result = method(s)[0]
assert isinstance(result, rdtype)
Expand Down Expand Up @@ -115,6 +119,10 @@ def test_iterable_map(self, index_or_series, dtype, rdtype):
# gh-13236
# coerce iteration to underlying python / pandas types
typ = index_or_series
if dtype == "float16" and issubclass(typ, pd.Index):
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
typ([1], dtype=dtype)
return
s = typ([1], dtype=dtype)
result = s.map(type)[0]
if not isinstance(rdtype, tuple):
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@ def test_value_counts(index_or_series_obj):

counter = collections.Counter(obj)
expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name)
expected.index = expected.index.astype(obj.dtype)

if obj.dtype != np.float16:
expected.index = expected.index.astype(obj.dtype)
else:
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
expected.index.astype(obj.dtype)
return

if not isinstance(result.dtype, np.dtype):
# i.e IntegerDtype
Expand Down Expand Up @@ -73,7 +79,13 @@ def test_value_counts_null(null_obj, index_or_series_obj):
# np.nan would be duplicated, whereas None wouldn't
counter = collections.Counter(obj.dropna())
expected = Series(dict(counter.most_common()), dtype=np.int64)
expected.index = expected.index.astype(obj.dtype)

if obj.dtype != np.float16:
expected.index = expected.index.astype(obj.dtype)
else:
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
expected.index.astype(obj.dtype)
return

result = obj.value_counts()
if obj.duplicated().any():
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/indexes/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,44 @@ def test_coerce_list(self):
assert type(arr) is Index


class TestFloat16Index:
# float 16 indexes not supported
# GH 49535
_index_cls = NumericIndex

def test_constructor(self):
index_cls = self._index_cls
dtype = np.float16

msg = "float16 indexes are not supported"

# explicit construction
with pytest.raises(NotImplementedError, match=msg):
index_cls([1, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls([1.0, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls([1.0, 2, 3, 4, 5], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)

# nan handling
with pytest.raises(NotImplementedError, match=msg):
index_cls([np.nan, np.nan], dtype=dtype)

with pytest.raises(NotImplementedError, match=msg):
index_cls(np.array([np.nan]), dtype=dtype)


class TestUIntNumericIndex(NumericInt):

_index_cls = NumericIndex
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def test_numpy_ufuncs_basic(index, func):
# coerces to float (e.g. np.sin)
with np.errstate(all="ignore"):
result = func(index)
exp = Index(func(index.values), name=index.name)
arr_result = func(index.values)
if arr_result.dtype == np.float16:
arr_result = arr_result.astype(np.float32)
exp = Index(arr_result, name=index.name)

tm.assert_index_equal(result, exp)
if type(index) is not Index or index.dtype == bool:
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ def test_factorize(self, index_or_series_obj, sort):
constructor = Index
if isinstance(obj, MultiIndex):
constructor = MultiIndex.from_tuples
expected_uniques = constructor(obj.unique())
expected_arr = obj.unique()
if expected_arr.dtype == np.float16:
expected_arr = expected_arr.astype(np.float32)
expected_uniques = constructor(expected_arr)
if (
isinstance(obj, Index)
and expected_uniques.dtype == bool
Expand Down