Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy #50396

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.3.rst
Expand Up @@ -30,6 +30,7 @@ Bug fixes
- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`)
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/missing.py
Expand Up @@ -584,6 +584,8 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo
if "boolean value of NA is ambiguous" in str(err):
return False
raise
except ValueError:
return False
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved
return True


Expand Down
109 changes: 102 additions & 7 deletions pandas/tests/dtypes/test_missing.py
Expand Up @@ -545,18 +545,113 @@ def test_array_equivalent_str(dtype):
)


def test_array_equivalent_nested():
@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested(strict_nan):
# reached in groupby aggregations, make sure we use np.any when checking
# if the comparison is truthy
left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we have both same-sized and mismatched-size cases? i expect these will be non-equivalent through different paths

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intentionally changed that here, because I assume the intent of the test was to test a numpy array of arrays. But if the arrays are the same length, the np.array(..) constructor actually converts this into a 2D array.

Below in another test, I added a case for same length (and then constructed the array with a workaround first creating an empty and then filling), see # same-length lists.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a same-length subarrays case in this test as well.

right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)
right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)

assert array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right[::-1], strict_nan=True)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object)
left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested2(strict_nan):
# more than one level of nesting
left = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
right = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object)
right = np.array([50])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested_list(strict_nan):
left = np.array([[50, 70, 90], [20, 30]], dtype=object)
right = np.array([[50, 70, 90], [20, 30]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([[50, 50, 50], [40, 40]], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_mixed_list(strict_nan):
# mixed arrays / lists in left and right
# https://github.com/pandas-dev/pandas/issues/50360
left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object)
right = np.array([[1, 2, 3], [4, 5]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# multiple levels of nesting
left = np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object),
],
dtype=object,
)
right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# same-length lists
subarr = np.empty(2, dtype=object)
subarr[:] = [
np.array([None, "b"], dtype=object),
np.array(["c", "d"], dtype=object),
]
left = np.array([subarr, None], dtype=object)
right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify my understanding, these xfails are not dependent on a future numpy version correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think not, but not 100% sure by heart. In any case, there were a bunch of cases that also with current numpy already failed with a direct array_equivalent while passing with assert_almost_equal because those two take slightly different code paths.

@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_dicts(strict_nan):
left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object)
right = np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object)
assert array_equivalent(left, right2, strict_nan=strict_nan)
assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan)


def test_array_equivalent_index_with_tuples():
Expand Down
84 changes: 84 additions & 0 deletions pandas/tests/util/test_assert_almost_equal.py
Expand Up @@ -448,3 +448,87 @@ def test_assert_almost_equal_iterable_values_mismatch():

with pytest.raises(AssertionError, match=msg):
tm.assert_almost_equal([1, 2], [1, 3])


subarr = np.empty(2, dtype=object)
subarr[:] = [np.array([None, "b"], dtype=object), np.array(["c", "d"], dtype=object)]

NESTED_CASES = [
# nested array
(
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
),
# >1 level of nesting
(
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
),
# lists
(
np.array([[50, 70, 90], [20, 30]], dtype=object),
np.array([[50, 70, 90], [20, 30]], dtype=object),
),
# mixed array/list
(
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([[1, 2, 3], [4, 5]], dtype=object),
),
(
np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array(
[np.array([6]), np.array([7, 8]), np.array([9])], dtype=object
),
],
dtype=object,
),
np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object),
),
# same-length lists
(
np.array([subarr, None], dtype=object),
np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object),
),
# dicts
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
),
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object),
),
# array/list of dicts
(
np.array(
[
np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
),
np.array([], dtype=object),
],
dtype=object,
),
np.array([[{"f1": 1, "f2": ["a", "b"]}], []], dtype=object),
),
]


@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
@pytest.mark.parametrize("a,b", NESTED_CASES)
def test_assert_almost_equal_array_nested(a, b):
_assert_almost_equal_both(a, b)