Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TYP Series and DataFrame currently type-check as hashable #41283

Merged
merged 13 commits into from
Jun 29, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Expand Up @@ -703,6 +703,7 @@ Other API changes
- Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc <https://turbodbc.readthedocs.io/en/latest/>`_ (:issue:`36893`)
- Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
- :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`)
- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed`` (:issue:`40013`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MarcoGorelli do we need to add the change of isintance of collections.abc.Hashable?


.. _whatsnew_130.api_breaking.build:

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/base.py
Expand Up @@ -1296,8 +1296,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
"""
raise TypeError(f"cannot perform {name} with type {self.dtype}")

def __hash__(self) -> int:
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

# ------------------------------------------------------------------------
# Non-Optimized Default Methods
Expand Down
15 changes: 8 additions & 7 deletions pandas/core/frame.py
Expand Up @@ -6181,27 +6181,28 @@ def f(vals) -> tuple[np.ndarray, int]:
labels, shape = algorithms.factorize(vals, size_hint=len(self))
return labels.astype("i8", copy=False), len(shape)

subset_iterable: Iterable
if subset is None:
subset = self.columns
subset_iterable = self.columns
elif (
not np.iterable(subset)
or isinstance(subset, str)
or isinstance(subset, tuple)
and subset in self.columns
):
subset = (subset,)

# needed for mypy since can't narrow types using np.iterable
subset = cast(Iterable, subset)
subset_iterable = (subset,)
else:
# needed for mypy since can't narrow types using np.iterable
subset_iterable = cast(Iterable, subset)

# Verify all columns in subset exist in the queried dataframe
# Otherwise, raise a KeyError, same as if you try to __getitem__ with a
# key that doesn't exist.
diff = Index(subset).difference(self.columns)
diff = Index(subset_iterable).difference(self.columns)
if not diff.empty:
raise KeyError(diff)

vals = (col.values for name, col in self.items() if name in subset)
vals = (col.values for name, col in self.items() if name in subset_iterable)
labels, shape = map(list, zip(*map(f, vals)))

ids = get_group_index(
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/generic.py
Expand Up @@ -1873,11 +1873,10 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
# ----------------------------------------------------------------------
# Iteration

def __hash__(self) -> int:
raise TypeError(
f"{repr(type(self).__name__)} objects are mutable, "
f"thus they cannot be hashed"
)
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

def __iter__(self):
"""
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Expand Up @@ -4532,9 +4532,10 @@ def __contains__(self, key: Any) -> bool:
except (OverflowError, TypeError, ValueError):
return False

@final
def __hash__(self):
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

@final
def __setitem__(self, key, value):
Expand Down
21 changes: 10 additions & 11 deletions pandas/core/reshape/pivot.py
Expand Up @@ -482,7 +482,7 @@ def pivot(
if columns is None:
raise TypeError("pivot() missing 1 required argument: 'columns'")

columns = com.convert_to_list_like(columns)
columns_listlike = com.convert_to_list_like(columns)

if values is None:
if index is not None:
Expand All @@ -494,28 +494,27 @@ def pivot(
# error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
# error: Unsupported left operand type for + ("ExtensionArray")
indexed = data.set_index(
cols + columns, append=append # type: ignore[operator]
cols + columns_listlike, append=append # type: ignore[operator]
)
else:
if index is None:
index = [Series(data.index, name=data.index.name)]
index_list = [Series(data.index, name=data.index.name)]
else:
index = com.convert_to_list_like(index)
index = [data[idx] for idx in index]
index_list = [data[idx] for idx in com.convert_to_list_like(index)]

data_columns = [data[col] for col in columns]
index.extend(data_columns)
index = MultiIndex.from_arrays(index)
data_columns = [data[col] for col in columns_listlike]
index_list.extend(data_columns)
multiindex = MultiIndex.from_arrays(index_list)

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
values = cast(Sequence[Hashable], values)
indexed = data._constructor(
data[values]._values, index=index, columns=values
data[values]._values, index=multiindex, columns=values
)
else:
indexed = data._constructor_sliced(data[values]._values, index=index)
return indexed.unstack(columns)
indexed = data._constructor_sliced(data[values]._values, index=multiindex)
return indexed.unstack(columns_listlike)


def crosstab(
Expand Down
1 change: 0 additions & 1 deletion pandas/core/series.py
Expand Up @@ -305,7 +305,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
hasnans = property( # type: ignore[assignment]
base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
)
__hash__ = generic.NDFrame.__hash__
_mgr: SingleManager
div: Callable[[Series, Any], Series]
rdiv: Callable[[Series, Any], Series]
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_api.py
Expand Up @@ -91,7 +91,7 @@ def test_not_hashable(self):
empty_frame = DataFrame()

df = DataFrame([1])
msg = "'DataFrame' objects are mutable, thus they cannot be hashed"
msg = "unhashable type: 'DataFrame'"
with pytest.raises(TypeError, match=msg):
hash(df)
with pytest.raises(TypeError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_api.py
Expand Up @@ -101,7 +101,7 @@ def test_index_tab_completion(self, index):
def test_not_hashable(self):
s_empty = Series(dtype=object)
s = Series([1])
msg = "'Series' objects are mutable, thus they cannot be hashed"
msg = "unhashable type: 'Series'"
with pytest.raises(TypeError, match=msg):
hash(s_empty)
with pytest.raises(TypeError, match=msg):
Expand Down