Skip to content

Commit

Permalink
Merge pull request #20462 from charris/backport-20310
Browse files Browse the repository at this point in the history
BUG: Fix float16 einsum fastpaths using wrong tempvar
  • Loading branch information
charris committed Nov 26, 2021
2 parents 382b4cb + abb3dd3 commit 884c6c8
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 15 deletions.
30 changes: 15 additions & 15 deletions numpy/core/src/multiarray/einsum_sumprod.c.src
Expand Up @@ -337,13 +337,13 @@ static NPY_GCC_OPT_3 void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
const @type@ b@i@ = @from@(data[@i@]);
const @type@ c@i@ = @from@(data_out[@i@]);
const @temptype@ b@i@ = @from@(data[@i@]);
const @temptype@ c@i@ = @from@(data_out[@i@]);
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
const @type@ abc@i@ = scalar * b@i@ + c@i@;
const @temptype@ abc@i@ = scalar * b@i@ + c@i@;
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
Expand All @@ -353,8 +353,8 @@ static NPY_GCC_OPT_3 void
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data, ++data_out) {
const @type@ b = @from@(*data);
const @type@ c = @from@(*data_out);
const @temptype@ b = @from@(*data);
const @temptype@ c = @from@(*data_out);
*data_out = @to@(scalar * b + c);
}
#endif // NPYV check for @type@
Expand Down Expand Up @@ -417,14 +417,14 @@ static void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
const @type@ a@i@ = @from@(data0[@i@]);
const @type@ b@i@ = @from@(data1[@i@]);
const @type@ c@i@ = @from@(data_out[@i@]);
const @temptype@ a@i@ = @from@(data0[@i@]);
const @temptype@ b@i@ = @from@(data1[@i@]);
const @temptype@ c@i@ = @from@(data_out[@i@]);
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
const @type@ abc@i@ = a@i@ * b@i@ + c@i@;
const @temptype@ abc@i@ = a@i@ * b@i@ + c@i@;
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
Expand All @@ -434,9 +434,9 @@ static void
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data0, ++data1, ++data_out) {
const @type@ a = @from@(*data0);
const @type@ b = @from@(*data1);
const @type@ c = @from@(*data_out);
const @temptype@ a = @from@(*data0);
const @temptype@ b = @from@(*data1);
const @temptype@ c = @from@(*data_out);
*data_out = @to@(a * b + c);
}
#endif // NPYV check for @type@
Expand Down Expand Up @@ -521,14 +521,14 @@ static NPY_GCC_OPT_3 void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
const @type@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
const @temptype@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
/**end repeat2**/
accum += ab0 + ab1 + ab2 + ab3;
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data0, ++data1) {
const @type@ a = @from@(*data0);
const @type@ b = @from@(*data1);
const @temptype@ a = @from@(*data0);
const @temptype@ b = @from@(*data1);
accum += a * b;
}
#endif // NPYV check for @type@
Expand Down
48 changes: 48 additions & 0 deletions numpy/core/tests/test_einsum.py
@@ -1,5 +1,7 @@
import itertools

import pytest

import numpy as np
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_almost_equal,
Expand Down Expand Up @@ -744,6 +746,52 @@ def test_einsum_all_contig_non_contig_output(self):
np.einsum('ij,jk->ik', x, x, out=out)
assert_array_equal(out.base, correct_base)

@pytest.mark.parametrize("dtype",
np.typecodes["AllFloat"] + np.typecodes["AllInteger"])
def test_different_paths(self, dtype):
# Test originally added to cover broken float16 path: gh-20305
# Likely most are covered elsewhere, at least partially.
dtype = np.dtype(dtype)
# Simple test, designed to excersize most specialized code paths,
# note the +0.5 for floats. This makes sure we use a float value
# where the results must be exact.
arr = (np.arange(7) + 0.5).astype(dtype)
scalar = np.array(2, dtype=dtype)

# contig -> scalar:
res = np.einsum('i->', arr)
assert res == arr.sum()
# contig, contig -> contig:
res = np.einsum('i,i->i', arr, arr)
assert_array_equal(res, arr * arr)
# noncontig, noncontig -> contig:
res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2])
assert_array_equal(res, arr * arr)
# contig + contig -> scalar
assert np.einsum('i,i->', arr, arr) == (arr * arr).sum()
# contig + scalar -> contig (with out)
out = np.ones(7, dtype=dtype)
res = np.einsum('i,->i', arr, dtype.type(2), out=out)
assert_array_equal(res, arr * dtype.type(2))
# scalar + contig -> contig (with out)
res = np.einsum(',i->i', scalar, arr)
assert_array_equal(res, arr * dtype.type(2))
# scalar + contig -> scalar
res = np.einsum(',i->', scalar, arr)
# Use einsum to compare to not have difference due to sum round-offs:
assert res == np.einsum('i->', scalar * arr)
# contig + scalar -> scalar
res = np.einsum('i,->', arr, scalar)
# Use einsum to compare to not have difference due to sum round-offs:
assert res == np.einsum('i->', scalar * arr)
# contig + contig + contig -> scalar
arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype)
res = np.einsum('i,i,i->', arr, arr, arr)
assert_array_equal(res, (arr * arr * arr).sum())
# four arrays:
res = np.einsum('i,i,i,i->', arr, arr, arr, arr)
assert_array_equal(res, (arr * arr * arr * arr).sum())

def test_small_boolean_arrays(self):
# See gh-5946.
# Use array of True embedded in False.
Expand Down

0 comments on commit 884c6c8

Please sign in to comment.