Skip to content

Commit

Permalink
Backport PR pandas-dev#50565 on branch 1.5.x (BUG: Fix more interchan…
Browse files Browse the repository at this point in the history
…ge Column.size method) (pandas-dev#50610)

Backport PR pandas-dev#50565: BUG: Fix more interchange Column.size method

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and mroeschke committed Jan 7, 2023
1 parent af97958 commit 43073cc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 12 deletions.
1 change: 0 additions & 1 deletion pandas/core/interchange/dataframe_protocol.py
Expand Up @@ -213,7 +213,6 @@ class Column(ABC):
doesn't need its own version or ``__column__`` protocol.
"""

@property
@abstractmethod
def size(self) -> int:
"""
Expand Down
22 changes: 11 additions & 11 deletions pandas/core/interchange/from_dataframe.py
Expand Up @@ -155,7 +155,7 @@ def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
buffers = col.get_buffers()

data_buff, data_dtype = buffers["data"]
data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size)
data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size())

data = set_nulls(data, col, buffers["validity"])
return data, buffers
Expand Down Expand Up @@ -187,7 +187,7 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
buffers = col.get_buffers()

codes_buff, codes_dtype = buffers["data"]
codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size)
codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size())

# Doing module in order to not get ``IndexError`` for
# out-of-bounds sentinel values in `codes`
Expand Down Expand Up @@ -244,29 +244,29 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
Endianness.NATIVE,
)
# Specify zero offset as we don't want to chunk the string data
data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size)
data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size())

# Retrieve the offsets buffer containing the index offsets demarcating
# the beginning and the ending of each string
offset_buff, offset_dtype = buffers["offsets"]
# Offsets buffer contains start-stop positions of strings in the data buffer,
# meaning that it has more elements than in the data buffer, do `col.size + 1` here
# to pass a proper offsets buffer size
# meaning that it has more elements than in the data buffer, do `col.size() + 1`
# here to pass a proper offsets buffer size
offsets = buffer_to_ndarray(
offset_buff, offset_dtype, col.offset, length=col.size + 1
offset_buff, offset_dtype, col.offset, length=col.size() + 1
)

null_pos = None
if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
assert buffers["validity"], "Validity buffers cannot be empty for masks"
valid_buff, valid_dtype = buffers["validity"]
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
if sentinel_val == 0:
null_pos = ~null_pos

# Assemble the strings from the code units
str_list: list[None | float | str] = [None] * col.size
for i in range(col.size):
str_list: list[None | float | str] = [None] * col.size()
for i in range(col.size()):
# Check for missing values
if null_pos is not None and null_pos[i]:
str_list[i] = np.nan
Expand Down Expand Up @@ -349,7 +349,7 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
Endianness.NATIVE,
),
col.offset,
col.size,
col.size(),
)

data = parse_datetime_format_str(format_str, data)
Expand Down Expand Up @@ -501,7 +501,7 @@ def set_nulls(
elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
assert validity, "Expected to have a validity buffer for the mask"
valid_buff, valid_dtype = validity
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
if sentinel_val == 0:
null_pos = ~null_pos
elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN):
Expand Down

0 comments on commit 43073cc

Please sign in to comment.