Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Enforce deprecation of partial failure in transform for lists/dicts #49375

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ Removal of prior version deprecations/changes
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
-

.. ---------------------------------------------------------------------------
Expand Down
27 changes: 1 addition & 26 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,34 +266,9 @@ def transform_dict_like(self, func):
func = self.normalize_dictlike_arg("transform", obj, func)

results: dict[Hashable, DataFrame | Series] = {}
failed_names = []
all_type_errors = True
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
results[name] = colg.transform(how, 0, *args, **kwargs)
except Exception as err:
if str(err) in {
"Function did not transform",
"No transform functions were provided",
}:
raise err
else:
if not isinstance(err, TypeError):
all_type_errors = False
failed_names.append(name)
# combine results
if not results:
klass = TypeError if all_type_errors else ValueError
raise klass("Transform function failed")
if len(failed_names) > 0:
warnings.warn(
f"{failed_names} did not transform successfully. If any error is "
f"raised, this will raise in a future version of pandas. "
f"Drop these columns/ops to avoid this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
results[name] = colg.transform(how, 0, *args, **kwargs)
return concat(results, axis=1)

def transform_str_or_callable(self, func) -> DataFrame | Series:
Expand Down
97 changes: 46 additions & 51 deletions pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,32 +133,37 @@ def func(x):
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series, request):
# GH 35964
if op == "rank":
request.node.add_marker(
pytest.mark.xfail(
raises=ValueError, reason="GH 40418: rank does not raise a TypeError"
)
)
elif op == "ngroup":
if op == "ngroup":
request.node.add_marker(
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
)

obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
obj = tm.get_obj(obj, frame_or_series)
if op == "rank":
error = ValueError
msg = "Transform function failed"
else:
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)

with pytest.raises(TypeError, match="unsupported operand|not supported"):
with pytest.raises(error, match=msg):
obj.transform(op)
with pytest.raises(TypeError, match="Transform function failed"):
with pytest.raises(error, match=msg):
obj.transform([op])
with pytest.raises(TypeError, match="Transform function failed"):
with pytest.raises(error, match=msg):
obj.transform({"A": op})
with pytest.raises(TypeError, match="Transform function failed"):
with pytest.raises(error, match=msg):
obj.transform({"A": [op]})


@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_partial_failure_typeerror(request, op):
def test_transform_failure_typeerror(request, op):
# GH 35964

if op == "ngroup":
Expand All @@ -168,62 +173,52 @@ def test_transform_partial_failure_typeerror(request, op):

# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
if op == "rank":
error = ValueError
msg = "Transform function failed"
else:
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)

expected = df[["B"]].transform([op])
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
tm.assert_equal(result, expected)
with pytest.raises(error, match=msg):
df.transform([op])

expected = df[["B"]].transform({"B": op})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)
with pytest.raises(error, match=msg):
df.transform({"A": op, "B": op})

expected = df[["B"]].transform({"B": [op]})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)
with pytest.raises(error, match=msg):
df.transform({"A": [op], "B": [op]})

expected = df.transform({"A": ["shift"], "B": [op]})
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
with pytest.raises(error, match=msg):
df.transform({"A": [op, "shift"], "B": [op]})


def test_transform_partial_failure_valueerror():
def test_transform_failure_valueerror():
# GH 40211
match = ".*did not transform successfully"

def op(x):
if np.sum(np.sum(x)) < 10:
raise ValueError
return x

df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
msg = "Transform function failed"

expected = df[["B"]].transform([op])
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
tm.assert_equal(result, expected)
with pytest.raises(ValueError, match=msg):
df.transform([op])

expected = df[["B"]].transform({"B": op})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)
with pytest.raises(ValueError, match=msg):
df.transform({"A": op, "B": op})

expected = df[["B"]].transform({"B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)
with pytest.raises(ValueError, match=msg):
df.transform({"A": [op], "B": [op]})

expected = df.transform({"A": ["shift"], "B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
with pytest.raises(ValueError, match=msg):
df.transform({"A": [op, "shift"], "B": [op]})


@pytest.mark.parametrize("use_apply", [True, False])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def test_agg_none_to_type():
def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
msg = "argument must be a"
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})

Expand Down
85 changes: 36 additions & 49 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,72 +280,59 @@ def test_transform_partial_failure(op, request):
# GH 35964
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.node.add_marker(
pytest.mark.xfail(
raises=AssertionError, reason=f"{op} is successful on any dtype"
)
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
)

# Using object makes most transform kernels fail
ser = Series(3 * [object])

expected = ser.transform(["shift"])
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([op, "shift"])
tm.assert_equal(result, expected)

expected = ser.transform({"B": "shift"})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": op, "B": "shift"})
tm.assert_equal(result, expected)

expected = ser.transform({"B": ["shift"]})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op], "B": ["shift"]})
tm.assert_equal(result, expected)

match = r"\['B'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
expected = ser.transform({"A": ["shift"], "B": [op]})
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
if op in ("fillna", "ngroup", "rank"):
error = ValueError
msg = "Transform function failed"
else:
error = TypeError
msg = "|".join(
[
"not supported between instances of 'type' and 'type'",
"unsupported operand type",
]
)

with pytest.raises(error, match=msg):
ser.transform([op, "shift"])

with pytest.raises(error, match=msg):
ser.transform({"A": op, "B": "shift"})

with pytest.raises(error, match=msg):
ser.transform({"A": [op], "B": ["shift"]})

with pytest.raises(error, match=msg):
ser.transform({"A": [op, "shift"], "B": [op]})


def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully"

def noop(x):
return x

def raising_op(_):
raise ValueError

ser = Series(3 * [object])
msg = "Transform function failed"

with pytest.raises(ValueError, match=msg):
ser.transform([noop, raising_op])

with pytest.raises(ValueError, match=msg):
ser.transform({"A": raising_op, "B": noop})

with pytest.raises(ValueError, match=msg):
ser.transform({"A": [raising_op], "B": [noop]})

expected = ser.transform([noop])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([noop, raising_op])
tm.assert_equal(result, expected)

expected = ser.transform({"B": noop})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": raising_op, "B": noop})
tm.assert_equal(result, expected)

expected = ser.transform({"B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [raising_op], "B": [noop]})
tm.assert_equal(result, expected)

expected = ser.transform({"A": [noop], "B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [noop, raising_op], "B": [noop]})
tm.assert_equal(result, expected)
with pytest.raises(ValueError, match=msg):
ser.transform({"A": [noop, raising_op], "B": [noop]})


def test_demo():
Expand Down