Backport PR pandas-dev#50565 on branch 1.5.x (BUG: Fix more interchan…

…ge Column.size method) (pandas-dev#50610) Backport PR pandas-dev#50565: BUG: Fix more interchange Column.size method Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
MarcoGorelli · Jan 7, 2023 · 43073cc · 43073cc
1 parent af97958
commit 43073cc
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 12 deletions.
diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py
@@ -213,7 +213,6 @@ class Column(ABC):
           doesn't need its own version or ``__column__`` protocol.
     """
 
-    @property
     @abstractmethod
     def size(self) -> int:
         """

diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -155,7 +155,7 @@ def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
     buffers = col.get_buffers()
 
     data_buff, data_dtype = buffers["data"]
-    data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size)
+    data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size())
 
     data = set_nulls(data, col, buffers["validity"])
     return data, buffers
@@ -187,7 +187,7 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
     buffers = col.get_buffers()
 
     codes_buff, codes_dtype = buffers["data"]
-    codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size)
+    codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size())
 
     # Doing module in order to not get ``IndexError`` for
     # out-of-bounds sentinel values in `codes`
@@ -244,29 +244,29 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
         Endianness.NATIVE,
     )
     # Specify zero offset as we don't want to chunk the string data
-    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size)
+    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size())
 
     # Retrieve the offsets buffer containing the index offsets demarcating
     # the beginning and the ending of each string
     offset_buff, offset_dtype = buffers["offsets"]
     # Offsets buffer contains start-stop positions of strings in the data buffer,
-    # meaning that it has more elements than in the data buffer, do `col.size + 1` here
-    # to pass a proper offsets buffer size
+    # meaning that it has more elements than in the data buffer, do `col.size() + 1`
+    # here to pass a proper offsets buffer size
     offsets = buffer_to_ndarray(
-        offset_buff, offset_dtype, col.offset, length=col.size + 1
+        offset_buff, offset_dtype, col.offset, length=col.size() + 1
     )
 
     null_pos = None
     if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
         assert buffers["validity"], "Validity buffers cannot be empty for masks"
         valid_buff, valid_dtype = buffers["validity"]
-        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
+        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
         if sentinel_val == 0:
             null_pos = ~null_pos
 
     # Assemble the strings from the code units
-    str_list: list[None | float | str] = [None] * col.size
-    for i in range(col.size):
+    str_list: list[None | float | str] = [None] * col.size()
+    for i in range(col.size()):
         # Check for missing values
         if null_pos is not None and null_pos[i]:
             str_list[i] = np.nan
@@ -349,7 +349,7 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
             Endianness.NATIVE,
         ),
         col.offset,
-        col.size,
+        col.size(),
     )
 
     data = parse_datetime_format_str(format_str, data)
@@ -501,7 +501,7 @@ def set_nulls(
     elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
         assert validity, "Expected to have a validity buffer for the mask"
         valid_buff, valid_dtype = validity
-        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
+        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
         if sentinel_val == 0:
             null_pos = ~null_pos
     elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN):