Skip to content

Commit

Permalink
Backport PR pandas-dev#56587: ENH: support the Arrow PyCapsule Interf…
Browse files Browse the repository at this point in the history
…ace on pandas.DataFrame (export)
  • Loading branch information
jorisvandenbossche authored and meeseeksmachine committed Jan 18, 2024
1 parent 74fa740 commit 2254508
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,8 @@ def import_optional_dependency(
The imported module, when found and the version is correct.
None is returned when the package is not found and `errors`
is False, or when the package's version is too old and `errors`
is ``'warn'``.
is ``'warn'`` or ``'ignore'``.
"""

assert errors in {"warn", "raise", "ignore"}

package_name = INSTALL_MAPPING.get(name)
Expand Down Expand Up @@ -163,5 +162,7 @@ def import_optional_dependency(
return None
elif errors == "raise":
raise ImportError(msg)
else:
return None

return module
27 changes: 27 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,33 @@ def __dataframe_consortium_standard__(
)
return convert_to_standard_compliant_dataframe(self, api_version=api_version)

def __arrow_c_stream__(self, requested_schema=None):
"""
Export the pandas DataFrame as an Arrow C stream PyCapsule.
This relies on pyarrow to convert the pandas DataFrame to the Arrow
format (and follows the default behaviour of ``pyarrow.Table.from_pandas``
in its handling of the index, i.e. store the index as a column except
for RangeIndex).
This conversion is not necessarily zero-copy.
Parameters
----------
requested_schema : PyCapsule, default None
The schema to which the dataframe should be casted, passed as a
PyCapsule containing a C ArrowSchema representation of the
requested schema.
Returns
-------
PyCapsule
"""
pa = import_optional_dependency("pyarrow", min_version="14.0.0")
if requested_schema is not None:
requested_schema = pa.Schema._import_from_c_capsule(requested_schema)
table = pa.Table.from_pandas(self, schema=requested_schema)
return table.__arrow_c_stream__()

# ----------------------------------------------------------------------

@property
Expand Down
45 changes: 45 additions & 0 deletions pandas/tests/frame/test_arrow_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import ctypes

import pytest

import pandas.util._test_decorators as td

import pandas as pd

pa = pytest.importorskip("pyarrow")


@td.skip_if_no("pyarrow", min_version="14.0")
def test_dataframe_arrow_interface():
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

capsule = df.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)

table = pa.table(df)
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
assert table.equals(expected)

schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
table = pa.table(df, schema=schema)
expected = expected.cast(schema)
assert table.equals(expected)


@td.skip_if_no("pyarrow", min_version="15.0")
def test_dataframe_to_arrow():
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

table = pa.RecordBatchReader.from_stream(df)
expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
assert table.equals(expected)

schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
table = pa.RecordBatchReader.from_stream(df, schema=schema)
expected = expected.cast(schema)
assert table.equals(expected)
14 changes: 14 additions & 0 deletions pandas/tests/test_optional_dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ def test_bad_version(monkeypatch):
result = import_optional_dependency("fakemodule")
assert result is module

with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
import_optional_dependency("fakemodule", min_version="1.1.0")

with tm.assert_produces_warning(UserWarning):
result = import_optional_dependency(
"fakemodule", errors="warn", min_version="1.1.0"
)
assert result is None

result = import_optional_dependency(
"fakemodule", errors="ignore", min_version="1.1.0"
)
assert result is None


def test_submodule(monkeypatch):
# Create a fake module with a submodule
Expand Down

0 comments on commit 2254508

Please sign in to comment.