Backport PR #41283: TYP Series and DataFrame currently type-check as …

…hashable (#42299) Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
pandas-dev · Jun 29, 2021 · 282b76e · 282b76e
1 parent aff6a8c
commit 282b76e
Show file tree

Hide file tree

Showing 9 changed files with 29 additions and 25 deletions.
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -707,6 +707,7 @@ Other API changes
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc <https://turbodbc.readthedocs.io/en/latest/>`_ (:issue:`36893`)
 - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
 - :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`)
+- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(<Series>, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`)
 - :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed.
 
 .. _whatsnew_130.api_breaking.build:

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1296,8 +1296,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
         """
         raise TypeError(f"cannot perform {name} with type {self.dtype}")
 
-    def __hash__(self) -> int:
-        raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
+    # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
+    # Incompatible types in assignment (expression has type "None", base class
+    # "object" defined the type as "Callable[[object], int]")
+    __hash__: None  # type: ignore[assignment]
 
     # ------------------------------------------------------------------------
     # Non-Optimized Default Methods

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6186,7 +6186,10 @@ def f(vals) -> tuple[np.ndarray, int]:
             return labels.astype("i8", copy=False), len(shape)
 
         if subset is None:
-            subset = self.columns
+            # Incompatible types in assignment
+            # (expression has type "Index", variable has type "Sequence[Any]")
+            # (pending on https://github.com/pandas-dev/pandas/issues/28770)
+            subset = self.columns  # type: ignore[assignment]
         elif (
             not np.iterable(subset)
             or isinstance(subset, str)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1873,11 +1873,10 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
     # ----------------------------------------------------------------------
     # Iteration
 
-    def __hash__(self) -> int:
-        raise TypeError(
-            f"{repr(type(self).__name__)} objects are mutable, "
-            f"thus they cannot be hashed"
-        )
+    # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
+    # Incompatible types in assignment (expression has type "None", base class
+    # "object" defined the type as "Callable[[object], int]")
+    __hash__: None  # type: ignore[assignment]
 
     def __iter__(self):
         """

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4540,9 +4540,10 @@ def __contains__(self, key: Any) -> bool:
         except (OverflowError, TypeError, ValueError):
             return False
 
-    @final
-    def __hash__(self):
-        raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
+    # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
+    # Incompatible types in assignment (expression has type "None", base class
+    # "object" defined the type as "Callable[[object], int]")
+    __hash__: None  # type: ignore[assignment]
 
     @final
     def __setitem__(self, key, value):

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -482,7 +482,7 @@ def pivot(
     if columns is None:
         raise TypeError("pivot() missing 1 required argument: 'columns'")
 
-    columns = com.convert_to_list_like(columns)
+    columns_listlike = com.convert_to_list_like(columns)
 
     if values is None:
         if index is not None:
@@ -494,28 +494,27 @@ def pivot(
         # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
         # error: Unsupported left operand type for + ("ExtensionArray")
         indexed = data.set_index(
-            cols + columns, append=append  # type: ignore[operator]
+            cols + columns_listlike, append=append  # type: ignore[operator]
         )
     else:
         if index is None:
-            index = [Series(data.index, name=data.index.name)]
+            index_list = [Series(data.index, name=data.index.name)]
         else:
-            index = com.convert_to_list_like(index)
-            index = [data[idx] for idx in index]
+            index_list = [data[idx] for idx in com.convert_to_list_like(index)]
 
-        data_columns = [data[col] for col in columns]
-        index.extend(data_columns)
-        index = MultiIndex.from_arrays(index)
+        data_columns = [data[col] for col in columns_listlike]
+        index_list.extend(data_columns)
+        multiindex = MultiIndex.from_arrays(index_list)
 
         if is_list_like(values) and not isinstance(values, tuple):
             # Exclude tuple because it is seen as a single column name
             values = cast(Sequence[Hashable], values)
             indexed = data._constructor(
-                data[values]._values, index=index, columns=values
+                data[values]._values, index=multiindex, columns=values
             )
         else:
-            indexed = data._constructor_sliced(data[values]._values, index=index)
-    return indexed.unstack(columns)
+            indexed = data._constructor_sliced(data[values]._values, index=multiindex)
+    return indexed.unstack(columns_listlike)
 
 
 def crosstab(

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -305,7 +305,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     hasnans = property(  # type: ignore[assignment]
         base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
     )
-    __hash__ = generic.NDFrame.__hash__
     _mgr: SingleManager
     div: Callable[[Series, Any], Series]
     rdiv: Callable[[Series, Any], Series]

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -91,7 +91,7 @@ def test_not_hashable(self):
         empty_frame = DataFrame()
 
         df = DataFrame([1])
-        msg = "'DataFrame' objects are mutable, thus they cannot be hashed"
+        msg = "unhashable type: 'DataFrame'"
         with pytest.raises(TypeError, match=msg):
             hash(df)
         with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -101,7 +101,7 @@ def test_index_tab_completion(self, index):
     def test_not_hashable(self):
         s_empty = Series(dtype=object)
         s = Series([1])
-        msg = "'Series' objects are mutable, thus they cannot be hashed"
+        msg = "unhashable type: 'Series'"
         with pytest.raises(TypeError, match=msg):
             hash(s_empty)
         with pytest.raises(TypeError, match=msg):