Skip to content

Commit

Permalink
Backport PR #41283: TYP Series and DataFrame currently type-check as …
Browse files Browse the repository at this point in the history
…hashable (#42299)

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
  • Loading branch information
meeseeksmachine and MarcoGorelli committed Jun 29, 2021
1 parent aff6a8c commit 282b76e
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 25 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Expand Up @@ -707,6 +707,7 @@ Other API changes
- Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc <https://turbodbc.readthedocs.io/en/latest/>`_ (:issue:`36893`)
- Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
- :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`)
- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(<Series>, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`)
- :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed.

.. _whatsnew_130.api_breaking.build:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/base.py
Expand Up @@ -1296,8 +1296,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
"""
raise TypeError(f"cannot perform {name} with type {self.dtype}")

def __hash__(self) -> int:
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

# ------------------------------------------------------------------------
# Non-Optimized Default Methods
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/frame.py
Expand Up @@ -6186,7 +6186,10 @@ def f(vals) -> tuple[np.ndarray, int]:
return labels.astype("i8", copy=False), len(shape)

if subset is None:
subset = self.columns
# Incompatible types in assignment
# (expression has type "Index", variable has type "Sequence[Any]")
# (pending on https://github.com/pandas-dev/pandas/issues/28770)
subset = self.columns # type: ignore[assignment]
elif (
not np.iterable(subset)
or isinstance(subset, str)
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/generic.py
Expand Up @@ -1873,11 +1873,10 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
# ----------------------------------------------------------------------
# Iteration

def __hash__(self) -> int:
raise TypeError(
f"{repr(type(self).__name__)} objects are mutable, "
f"thus they cannot be hashed"
)
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

def __iter__(self):
"""
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Expand Up @@ -4540,9 +4540,10 @@ def __contains__(self, key: Any) -> bool:
except (OverflowError, TypeError, ValueError):
return False

@final
def __hash__(self):
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
# Incompatible types in assignment (expression has type "None", base class
# "object" defined the type as "Callable[[object], int]")
__hash__: None # type: ignore[assignment]

@final
def __setitem__(self, key, value):
Expand Down
21 changes: 10 additions & 11 deletions pandas/core/reshape/pivot.py
Expand Up @@ -482,7 +482,7 @@ def pivot(
if columns is None:
raise TypeError("pivot() missing 1 required argument: 'columns'")

columns = com.convert_to_list_like(columns)
columns_listlike = com.convert_to_list_like(columns)

if values is None:
if index is not None:
Expand All @@ -494,28 +494,27 @@ def pivot(
# error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
# error: Unsupported left operand type for + ("ExtensionArray")
indexed = data.set_index(
cols + columns, append=append # type: ignore[operator]
cols + columns_listlike, append=append # type: ignore[operator]
)
else:
if index is None:
index = [Series(data.index, name=data.index.name)]
index_list = [Series(data.index, name=data.index.name)]
else:
index = com.convert_to_list_like(index)
index = [data[idx] for idx in index]
index_list = [data[idx] for idx in com.convert_to_list_like(index)]

data_columns = [data[col] for col in columns]
index.extend(data_columns)
index = MultiIndex.from_arrays(index)
data_columns = [data[col] for col in columns_listlike]
index_list.extend(data_columns)
multiindex = MultiIndex.from_arrays(index_list)

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
values = cast(Sequence[Hashable], values)
indexed = data._constructor(
data[values]._values, index=index, columns=values
data[values]._values, index=multiindex, columns=values
)
else:
indexed = data._constructor_sliced(data[values]._values, index=index)
return indexed.unstack(columns)
indexed = data._constructor_sliced(data[values]._values, index=multiindex)
return indexed.unstack(columns_listlike)


def crosstab(
Expand Down
1 change: 0 additions & 1 deletion pandas/core/series.py
Expand Up @@ -305,7 +305,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
hasnans = property( # type: ignore[assignment]
base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
)
__hash__ = generic.NDFrame.__hash__
_mgr: SingleManager
div: Callable[[Series, Any], Series]
rdiv: Callable[[Series, Any], Series]
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_api.py
Expand Up @@ -91,7 +91,7 @@ def test_not_hashable(self):
empty_frame = DataFrame()

df = DataFrame([1])
msg = "'DataFrame' objects are mutable, thus they cannot be hashed"
msg = "unhashable type: 'DataFrame'"
with pytest.raises(TypeError, match=msg):
hash(df)
with pytest.raises(TypeError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_api.py
Expand Up @@ -101,7 +101,7 @@ def test_index_tab_completion(self, index):
def test_not_hashable(self):
s_empty = Series(dtype=object)
s = Series([1])
msg = "'Series' objects are mutable, thus they cannot be hashed"
msg = "unhashable type: 'Series'"
with pytest.raises(TypeError, match=msg):
hash(s_empty)
with pytest.raises(TypeError, match=msg):
Expand Down

0 comments on commit 282b76e

Please sign in to comment.