Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Enforce alignment with numpy ufuncs #50455

Merged
merged 3 commits into from
Dec 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`)
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
Expand Down
82 changes: 0 additions & 82 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@

import operator
from typing import Any
import warnings

import numpy as np

from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.generic import ABCNDFrame

Expand Down Expand Up @@ -166,81 +164,6 @@ def __rpow__(self, other):
# Helpers to implement __array_ufunc__


def _is_aligned(frame, other):
"""
Helper to check if a DataFrame is aligned with another DataFrame or Series.
"""
from pandas import DataFrame

if isinstance(other, DataFrame):
return frame._indexed_same(other)
else:
# Series -> match index
return frame.columns.equals(other.index)


def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
In the future DataFrame, inputs to ufuncs will be aligned before applying
the ufunc, but for now we ignore the index but raise a warning if behaviour
would change in the future.
This helper detects the case where a warning is needed and then fallbacks
to applying the ufunc on arrays to avoid alignment.

See https://github.com/pandas-dev/pandas/pull/39239
"""
from pandas import DataFrame
from pandas.core.generic import NDFrame

n_alignable = sum(isinstance(x, NDFrame) for x in inputs)
n_frames = sum(isinstance(x, DataFrame) for x in inputs)

if n_alignable >= 2 and n_frames >= 1:
# if there are 2 alignable inputs (Series or DataFrame), of which at least 1
# is a DataFrame -> we would have had no alignment before -> warn that this
# will align in the future

# the first frame is what determines the output index/columns in pandas < 1.2
first_frame = next(x for x in inputs if isinstance(x, DataFrame))

# check if the objects are aligned or not
non_aligned = sum(
not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame)
)

# if at least one is not aligned -> warn and fallback to array behaviour
if non_aligned:
warnings.warn(
"Calling a ufunc on non-aligned DataFrames (or DataFrame/Series "
"combination). Currently, the indices are ignored and the result "
"takes the index/columns of the first DataFrame. In the future , "
"the DataFrames/Series will be aligned before applying the ufunc.\n"
"Convert one of the arguments to a NumPy array "
"(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, "
"or align manually (eg 'df1, df2 = df1.align(df2)') before passing to "
"the ufunc to obtain the future behaviour and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)

# keep the first dataframe of the inputs, other DataFrame/Series is
# converted to array for fallback behaviour
new_inputs = []
for x in inputs:
if x is first_frame:
new_inputs.append(x)
elif isinstance(x, NDFrame):
new_inputs.append(np.asarray(x))
else:
new_inputs.append(x)

# call the ufunc on those transformed inputs
return getattr(ufunc, method)(*new_inputs, **kwargs)

# signal that we didn't fallback / execute the ufunc yet
return NotImplemented


def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
Expand All @@ -260,11 +183,6 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)

kwargs = _standardize_out_kwarg(**kwargs)

# for backwards compatibility check and potentially fallback for non-aligned frames
result = _maybe_fallback(ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result

# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
Expand Down
97 changes: 47 additions & 50 deletions pandas/tests/frame/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,18 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):

if isinstance(dtype_a, dict) and isinstance(dtype_b, dict):
dtype_b["C"] = dtype_b.pop("B")

df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b)
with tm.assert_produces_warning(FutureWarning):
result = np.heaviside(df1, df2)
# Expected future behaviour:
# expected = np.heaviside(
# np.array([[1, 3, np.nan], [2, 4, np.nan]]),
# np.array([[1, np.nan, 3], [2, np.nan, 4]]),
# )
# expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
# As of 2.0, align first before applying the ufunc
result = np.heaviside(df1, df2)
expected = np.heaviside(
np.array([[1, 3, np.nan], [2, 4, np.nan]]),
np.array([[1, np.nan, 3], [2, np.nan, 4]]),
)
expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
tm.assert_frame_equal(result, expected)

# ensure the expected is the same when applying with numpy array
result = np.heaviside(df1, df2.values)
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
tm.assert_frame_equal(result, expected)


Expand All @@ -146,35 +143,29 @@ def test_binary_input_aligns_index(request, dtype):
)
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype)
df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype)
with tm.assert_produces_warning(FutureWarning):
result = np.heaviside(df1, df2)
# Expected future behaviour:
# expected = np.heaviside(
# np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
# np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
# )
# # TODO(FloatArray): this will be Float64Dtype.
# expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
expected = pd.DataFrame(
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
result = np.heaviside(df1, df2)
expected = np.heaviside(
np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
)
# TODO(FloatArray): this will be Float64Dtype.
expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
tm.assert_frame_equal(result, expected)

# ensure the expected is the same when applying with numpy array
result = np.heaviside(df1, df2.values)
expected = pd.DataFrame(
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
)
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning")
def test_binary_frame_series_raises():
# We don't currently implement
df = pd.DataFrame({"A": [1, 2]})
# with pytest.raises(NotImplementedError, match="logaddexp"):
with pytest.raises(ValueError, match=""):
with pytest.raises(NotImplementedError, match="logaddexp"):
np.logaddexp(df, df["A"])

# with pytest.raises(NotImplementedError, match="logaddexp"):
with pytest.raises(ValueError, match=""):
with pytest.raises(NotImplementedError, match="logaddexp"):
np.logaddexp(df["A"], df)


Expand Down Expand Up @@ -206,7 +197,8 @@ def test_frame_outer_disallowed():
np.subtract.outer(df, df)


def test_alignment_deprecation():
def test_alignment_deprecation_enforced():
# Enforced in 2.0
# https://github.com/pandas-dev/pandas/issues/39184
df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
Expand All @@ -221,12 +213,11 @@ def test_alignment_deprecation():
result = np.add(df1, df1)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
# non-aligned -> warns
result = np.add(df1, df2)
result = np.add(df1, df2.values)
tm.assert_frame_equal(result, expected)

result = np.add(df1, df2.values)
result = np.add(df1, df2)
expected = pd.DataFrame({"a": [np.nan] * 3, "b": [5, 7, 9], "c": [np.nan] * 3})
tm.assert_frame_equal(result, expected)

result = np.add(df1.values, df2)
Expand All @@ -241,20 +232,23 @@ def test_alignment_deprecation():
result = np.add(df1, s1)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = np.add(df1, s2)
result = np.add(df1, s2.values)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = np.add(s2, df1)
expected = pd.DataFrame(
{"a": [np.nan] * 3, "b": [5.0, 6.0, 7.0], "c": [np.nan] * 3}
)
result = np.add(df1, s2)
tm.assert_frame_equal(result, expected)

result = np.add(df1, s2.values)
tm.assert_frame_equal(result, expected)
msg = "Cannot apply ufunc <ufunc 'add'> to mixed DataFrame and Series inputs."
with pytest.raises(NotImplementedError, match=msg):
np.add(s2, df1)


@td.skip_if_no("numba")
def test_alignment_deprecation_many_inputs(request):
def test_alignment_deprecation_many_inputs_enforced():
# Enforced in 2.0
# https://github.com/pandas-dev/pandas/issues/39184
# test that the deprecation also works with > 2 inputs -> using a numba
# written ufunc for this because numpy itself doesn't have such ufuncs
Expand All @@ -271,31 +265,34 @@ def my_ufunc(x, y, z):
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})

with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1, df2, df3)
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
result = my_ufunc(df1, df2, df3)
expected = pd.DataFrame(np.full((3, 3), np.nan), columns=["a", "b", "c"])
tm.assert_frame_equal(result, expected)

# all aligned -> no warning
with tm.assert_produces_warning(None):
result = my_ufunc(df1, df1, df1)
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)

# mixed frame / arrays
with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1, df2, df3.values)
tm.assert_frame_equal(result, expected)
msg = (
r"operands could not be broadcast together with shapes \(3,3\) \(3,3\) \(3,2\)"
)
with pytest.raises(ValueError, match=msg):
my_ufunc(df1, df2, df3.values)

# single frame -> no warning
with tm.assert_produces_warning(None):
result = my_ufunc(df1, df2.values, df3.values)
tm.assert_frame_equal(result, expected)

# takes indices of first frame
with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1.values, df2, df3)
expected = expected.set_axis(["b", "c"], axis=1)
tm.assert_frame_equal(result, expected)
msg = (
r"operands could not be broadcast together with shapes \(3,2\) \(3,3\) \(3,3\)"
)
with pytest.raises(ValueError, match=msg):
my_ufunc(df1.values, df2, df3)


def test_array_ufuncs_for_many_arguments():
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,10 @@ def test_np_matmul():
# GH26650
df1 = pd.DataFrame(data=[[-1, 1, 10]])
df2 = pd.DataFrame(data=[-1, 1, 10])
expected_result = pd.DataFrame(data=[102])
expected = pd.DataFrame(data=[102])

with tm.assert_produces_warning(FutureWarning, match="on non-aligned"):
result = np.matmul(df1, df2)
tm.assert_frame_equal(
expected_result,
result,
)
result = np.matmul(df1, df2)
tm.assert_frame_equal(expected, result)


def test_array_ufuncs_for_many_arguments():
Expand Down