New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy #50396
Changes from all commits
cfd116d
07da6a7
84f1abc
7007b73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -545,18 +545,120 @@ def test_array_equivalent_str(dtype): | |
) | ||
|
||
|
||
def test_array_equivalent_nested(): | ||
@pytest.mark.parametrize( | ||
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] | ||
) | ||
def test_array_equivalent_nested(strict_nan): | ||
# reached in groupby aggregations, make sure we use np.any when checking | ||
# if the comparison is truthy | ||
left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) | ||
right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) | ||
left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) | ||
right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) | ||
|
||
assert array_equivalent(left, right, strict_nan=True) | ||
assert not array_equivalent(left, right[::-1], strict_nan=True) | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) | ||
left = np.empty(2, dtype=object) | ||
left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] | ||
right = np.empty(2, dtype=object) | ||
right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object) | ||
right = np.array([50, 40]) | ||
assert not array_equivalent(left, right, strict_nan=True) | ||
assert not array_equivalent(left, right, strict_nan=strict_nan) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] | ||
) | ||
def test_array_equivalent_nested2(strict_nan): | ||
# more than one level of nesting | ||
left = np.array( | ||
[ | ||
np.array([np.array([50, 70]), np.array([90])], dtype=object), | ||
np.array([np.array([20, 30])], dtype=object), | ||
], | ||
dtype=object, | ||
) | ||
right = np.array( | ||
[ | ||
np.array([np.array([50, 70]), np.array([90])], dtype=object), | ||
np.array([np.array([20, 30])], dtype=object), | ||
], | ||
dtype=object, | ||
) | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object) | ||
right = np.array([50]) | ||
assert not array_equivalent(left, right, strict_nan=strict_nan) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] | ||
) | ||
def test_array_equivalent_nested_list(strict_nan): | ||
left = np.array([[50, 70, 90], [20, 30]], dtype=object) | ||
right = np.array([[50, 70, 90], [20, 30]], dtype=object) | ||
|
||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
left = np.array([[50, 50, 50], [40, 40]], dtype=object) | ||
right = np.array([50, 40]) | ||
assert not array_equivalent(left, right, strict_nan=strict_nan) | ||
|
||
|
||
@pytest.mark.xfail(reason="failing") | ||
@pytest.mark.parametrize("strict_nan", [True, False]) | ||
def test_array_equivalent_nested_mixed_list(strict_nan): | ||
# mixed arrays / lists in left and right | ||
# https://github.com/pandas-dev/pandas/issues/50360 | ||
left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object) | ||
right = np.array([[1, 2, 3], [4, 5]], dtype=object) | ||
|
||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
# multiple levels of nesting | ||
left = np.array( | ||
[ | ||
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), | ||
np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object), | ||
], | ||
dtype=object, | ||
) | ||
right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object) | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
# same-length lists | ||
subarr = np.empty(2, dtype=object) | ||
subarr[:] = [ | ||
np.array([None, "b"], dtype=object), | ||
np.array(["c", "d"], dtype=object), | ||
] | ||
left = np.array([subarr, None], dtype=object) | ||
right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object) | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
|
||
@pytest.mark.xfail(reason="failing") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to clarify my understanding, these xfails are not dependent on a future numpy version correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think not, but not 100% sure by heart. In any case, there were a bunch of cases that also with current numpy already failed with a direct |
||
@pytest.mark.parametrize("strict_nan", [True, False]) | ||
def test_array_equivalent_nested_dicts(strict_nan): | ||
left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object) | ||
right = np.array( | ||
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object | ||
) | ||
assert array_equivalent(left, right, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) | ||
|
||
right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object) | ||
assert array_equivalent(left, right2, strict_nan=strict_nan) | ||
assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan) | ||
|
||
|
||
def test_array_equivalent_index_with_tuples(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we have both same-sized and mismatched-size cases? i expect these will be non-equivalent through different paths
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I intentionally changed that here, because I assume the intent of the test was to test a numpy array of arrays. But if the arrays are the same length, the
np.array(..)
constructor actually converts this into a 2D array.Below in another test, I added a case for same length (and then constructed the array with a workaround first creating an empty and then filling), see
# same-length lists
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a same-length subarrays case in this test as well.