Skip to content

Commit

Permalink
DEPR: Enforce alignment with numpy ufuncs (#50455)
Browse files Browse the repository at this point in the history
* DEPR: Enforce alignment with numpy ufuncs

* fix other test
  • Loading branch information
mroeschke committed Dec 28, 2022
1 parent ddde1dd commit 23c3676
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 139 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`)
- Removed deprecated :meth:`Index.asi8` (:issue:`37877`)
- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`)
- Enforced deprecation changing behavior when applying a numpy ufunc on multiple non-aligned (on the index or columns) :class:`DataFrame` that will now align the inputs first (:issue:`39239`)
- Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`)
- Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`)
- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`)
Expand Down
82 changes: 0 additions & 82 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@

import operator
from typing import Any
import warnings

import numpy as np

from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.generic import ABCNDFrame

Expand Down Expand Up @@ -166,81 +164,6 @@ def __rpow__(self, other):
# Helpers to implement __array_ufunc__


def _is_aligned(frame, other):
"""
Helper to check if a DataFrame is aligned with another DataFrame or Series.
"""
from pandas import DataFrame

if isinstance(other, DataFrame):
return frame._indexed_same(other)
else:
# Series -> match index
return frame.columns.equals(other.index)


def _maybe_fallback(ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
In the future DataFrame, inputs to ufuncs will be aligned before applying
the ufunc, but for now we ignore the index but raise a warning if behaviour
would change in the future.
This helper detects the case where a warning is needed and then fallbacks
to applying the ufunc on arrays to avoid alignment.
See https://github.com/pandas-dev/pandas/pull/39239
"""
from pandas import DataFrame
from pandas.core.generic import NDFrame

n_alignable = sum(isinstance(x, NDFrame) for x in inputs)
n_frames = sum(isinstance(x, DataFrame) for x in inputs)

if n_alignable >= 2 and n_frames >= 1:
# if there are 2 alignable inputs (Series or DataFrame), of which at least 1
# is a DataFrame -> we would have had no alignment before -> warn that this
# will align in the future

# the first frame is what determines the output index/columns in pandas < 1.2
first_frame = next(x for x in inputs if isinstance(x, DataFrame))

# check if the objects are aligned or not
non_aligned = sum(
not _is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame)
)

# if at least one is not aligned -> warn and fallback to array behaviour
if non_aligned:
warnings.warn(
"Calling a ufunc on non-aligned DataFrames (or DataFrame/Series "
"combination). Currently, the indices are ignored and the result "
"takes the index/columns of the first DataFrame. In the future , "
"the DataFrames/Series will be aligned before applying the ufunc.\n"
"Convert one of the arguments to a NumPy array "
"(eg 'ufunc(df1, np.asarray(df2)') to keep the current behaviour, "
"or align manually (eg 'df1, df2 = df1.align(df2)') before passing to "
"the ufunc to obtain the future behaviour and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)

# keep the first dataframe of the inputs, other DataFrame/Series is
# converted to array for fallback behaviour
new_inputs = []
for x in inputs:
if x is first_frame:
new_inputs.append(x)
elif isinstance(x, NDFrame):
new_inputs.append(np.asarray(x))
else:
new_inputs.append(x)

# call the ufunc on those transformed inputs
return getattr(ufunc, method)(*new_inputs, **kwargs)

# signal that we didn't fallback / execute the ufunc yet
return NotImplemented


def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
Expand All @@ -260,11 +183,6 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)

kwargs = _standardize_out_kwarg(**kwargs)

# for backwards compatibility check and potentially fallback for non-aligned frames
result = _maybe_fallback(ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result

# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
Expand Down
97 changes: 47 additions & 50 deletions pandas/tests/frame/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,18 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):

if isinstance(dtype_a, dict) and isinstance(dtype_b, dict):
dtype_b["C"] = dtype_b.pop("B")

df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b)
with tm.assert_produces_warning(FutureWarning):
result = np.heaviside(df1, df2)
# Expected future behaviour:
# expected = np.heaviside(
# np.array([[1, 3, np.nan], [2, 4, np.nan]]),
# np.array([[1, np.nan, 3], [2, np.nan, 4]]),
# )
# expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
# As of 2.0, align first before applying the ufunc
result = np.heaviside(df1, df2)
expected = np.heaviside(
np.array([[1, 3, np.nan], [2, 4, np.nan]]),
np.array([[1, np.nan, 3], [2, np.nan, 4]]),
)
expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
tm.assert_frame_equal(result, expected)

# ensure the expected is the same when applying with numpy array
result = np.heaviside(df1, df2.values)
expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
tm.assert_frame_equal(result, expected)


Expand All @@ -146,35 +143,29 @@ def test_binary_input_aligns_index(request, dtype):
)
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype)
df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype)
with tm.assert_produces_warning(FutureWarning):
result = np.heaviside(df1, df2)
# Expected future behaviour:
# expected = np.heaviside(
# np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
# np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
# )
# # TODO(FloatArray): this will be Float64Dtype.
# expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
expected = pd.DataFrame(
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
result = np.heaviside(df1, df2)
expected = np.heaviside(
np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
)
# TODO(FloatArray): this will be Float64Dtype.
expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
tm.assert_frame_equal(result, expected)

# ensure the expected is the same when applying with numpy array
result = np.heaviside(df1, df2.values)
expected = pd.DataFrame(
[[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
)
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning")
def test_binary_frame_series_raises():
# We don't currently implement
df = pd.DataFrame({"A": [1, 2]})
# with pytest.raises(NotImplementedError, match="logaddexp"):
with pytest.raises(ValueError, match=""):
with pytest.raises(NotImplementedError, match="logaddexp"):
np.logaddexp(df, df["A"])

# with pytest.raises(NotImplementedError, match="logaddexp"):
with pytest.raises(ValueError, match=""):
with pytest.raises(NotImplementedError, match="logaddexp"):
np.logaddexp(df["A"], df)


Expand Down Expand Up @@ -206,7 +197,8 @@ def test_frame_outer_disallowed():
np.subtract.outer(df, df)


def test_alignment_deprecation():
def test_alignment_deprecation_enforced():
# Enforced in 2.0
# https://github.com/pandas-dev/pandas/issues/39184
df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
Expand All @@ -221,12 +213,11 @@ def test_alignment_deprecation():
result = np.add(df1, df1)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
# non-aligned -> warns
result = np.add(df1, df2)
result = np.add(df1, df2.values)
tm.assert_frame_equal(result, expected)

result = np.add(df1, df2.values)
result = np.add(df1, df2)
expected = pd.DataFrame({"a": [np.nan] * 3, "b": [5, 7, 9], "c": [np.nan] * 3})
tm.assert_frame_equal(result, expected)

result = np.add(df1.values, df2)
Expand All @@ -241,20 +232,23 @@ def test_alignment_deprecation():
result = np.add(df1, s1)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = np.add(df1, s2)
result = np.add(df1, s2.values)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = np.add(s2, df1)
expected = pd.DataFrame(
{"a": [np.nan] * 3, "b": [5.0, 6.0, 7.0], "c": [np.nan] * 3}
)
result = np.add(df1, s2)
tm.assert_frame_equal(result, expected)

result = np.add(df1, s2.values)
tm.assert_frame_equal(result, expected)
msg = "Cannot apply ufunc <ufunc 'add'> to mixed DataFrame and Series inputs."
with pytest.raises(NotImplementedError, match=msg):
np.add(s2, df1)


@td.skip_if_no("numba")
def test_alignment_deprecation_many_inputs(request):
def test_alignment_deprecation_many_inputs_enforced():
# Enforced in 2.0
# https://github.com/pandas-dev/pandas/issues/39184
# test that the deprecation also works with > 2 inputs -> using a numba
# written ufunc for this because numpy itself doesn't have such ufuncs
Expand All @@ -271,31 +265,34 @@ def my_ufunc(x, y, z):
df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})

with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1, df2, df3)
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
result = my_ufunc(df1, df2, df3)
expected = pd.DataFrame(np.full((3, 3), np.nan), columns=["a", "b", "c"])
tm.assert_frame_equal(result, expected)

# all aligned -> no warning
with tm.assert_produces_warning(None):
result = my_ufunc(df1, df1, df1)
expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)

# mixed frame / arrays
with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1, df2, df3.values)
tm.assert_frame_equal(result, expected)
msg = (
r"operands could not be broadcast together with shapes \(3,3\) \(3,3\) \(3,2\)"
)
with pytest.raises(ValueError, match=msg):
my_ufunc(df1, df2, df3.values)

# single frame -> no warning
with tm.assert_produces_warning(None):
result = my_ufunc(df1, df2.values, df3.values)
tm.assert_frame_equal(result, expected)

# takes indices of first frame
with tm.assert_produces_warning(FutureWarning):
result = my_ufunc(df1.values, df2, df3)
expected = expected.set_axis(["b", "c"], axis=1)
tm.assert_frame_equal(result, expected)
msg = (
r"operands could not be broadcast together with shapes \(3,2\) \(3,3\) \(3,3\)"
)
with pytest.raises(ValueError, match=msg):
my_ufunc(df1.values, df2, df3)


def test_array_ufuncs_for_many_arguments():
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,10 @@ def test_np_matmul():
# GH26650
df1 = pd.DataFrame(data=[[-1, 1, 10]])
df2 = pd.DataFrame(data=[-1, 1, 10])
expected_result = pd.DataFrame(data=[102])
expected = pd.DataFrame(data=[102])

with tm.assert_produces_warning(FutureWarning, match="on non-aligned"):
result = np.matmul(df1, df2)
tm.assert_frame_equal(
expected_result,
result,
)
result = np.matmul(df1, df2)
tm.assert_frame_equal(expected, result)


def test_array_ufuncs_for_many_arguments():
Expand Down

0 comments on commit 23c3676

Please sign in to comment.