From e88592e94a0b7de726bc1e3dbf3abf637be47273 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 18 Oct 2022 15:17:14 +0200 Subject: [PATCH] BUG: Fix boundschecking for `random.logseries` Logseries previously did not enforce bounds to be strictly exclusive for the upper bound, where it leads to incorrect behavior. The NOT_NAN check is removed, since it was never used: The current bounded version always excludes NaNs. --- numpy/random/_common.pxd | 2 +- numpy/random/_common.pyx | 6 ++++++ numpy/random/_generator.pyx | 8 ++++---- numpy/random/mtrand.pyx | 10 +++++----- numpy/random/tests/test_generator_mt19937.py | 20 ++++++++++++++++---- numpy/random/tests/test_randomstate.py | 19 ++++++++++++++----- 6 files changed, 46 insertions(+), 19 deletions(-) diff --git a/numpy/random/_common.pxd b/numpy/random/_common.pxd index 3625634cd4e3..3eaf39ddf831 100644 --- a/numpy/random/_common.pxd +++ b/numpy/random/_common.pxd @@ -17,8 +17,8 @@ cdef enum ConstraintType: CONS_POSITIVE CONS_POSITIVE_NOT_NAN CONS_BOUNDED_0_1 - CONS_BOUNDED_0_1_NOTNAN CONS_BOUNDED_GT_0_1 + CONS_BOUNDED_LT_0_1 CONS_GT_1 CONS_GTE_1 CONS_POISSON diff --git a/numpy/random/_common.pyx b/numpy/random/_common.pyx index 607034a385e2..7b6f693037fb 100644 --- a/numpy/random/_common.pyx +++ b/numpy/random/_common.pyx @@ -392,6 +392,9 @@ cdef int check_array_constraint(np.ndarray val, object name, constraint_type con elif cons == CONS_BOUNDED_GT_0_1: if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)): raise ValueError("{0} <= 0, {0} > 1 or {0} contains NaNs".format(name)) + elif cons == CONS_BOUNDED_LT_0_1: + if not np.all(np.greater_equal(val, 0)) or not np.all(np.less(val, 1)): + raise ValueError("{0} < 0, {0} >= 1 or {0} contains NaNs".format(name)) elif cons == CONS_GT_1: if not np.all(np.greater(val, 1)): raise ValueError("{0} <= 1 or {0} contains NaNs".format(name)) @@ -428,6 +431,9 @@ cdef int check_constraint(double val, object name, constraint_type cons) except elif cons == CONS_BOUNDED_GT_0_1: if not val >0 or not val <= 1: raise ValueError("{0} <= 0, {0} > 1 or {0} contains NaNs".format(name)) + elif cons == CONS_BOUNDED_LT_0_1: + if not (val >= 0) or not (val < 1): + raise ValueError("{0} < 0, {0} >= 1 or {0} is NaN".format(name)) elif cons == CONS_GT_1: if not (val > 1): raise ValueError("{0} <= 1 or {0} is NaN".format(name)) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 5218c6d0eb23..2c25b7191560 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -25,7 +25,7 @@ from ._pcg64 import PCG64 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, - CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, + CONS_BOUNDED_LT_0_1, CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f, check_array_constraint, check_constraint, disc, discrete_broadcast_iii, validate_output_shape @@ -3437,12 +3437,12 @@ cdef class Generator: Draw samples from a logarithmic series distribution. Samples are drawn from a log series distribution with specified - shape parameter, 0 < ``p`` < 1. + shape parameter, 0 <= ``p`` < 1. Parameters ---------- p : float or array_like of floats - Shape parameter for the distribution. Must be in the range (0, 1). + Shape parameter for the distribution. Must be in the range [0, 1). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3506,7 +3506,7 @@ cdef class Generator: """ return disc(&random_logseries, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, + p, 'p', CONS_BOUNDED_LT_0_1, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE) diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index fcc1f27d266f..ae40931d0e81 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -19,8 +19,8 @@ from ._bounded_integers cimport (_rand_bool, _rand_int32, _rand_int64, from ._mt19937 import MT19937 as _MT19937 from numpy.random cimport bitgen_t from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, - CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, CONS_GTE_1, - CONS_GT_1, LEGACY_CONS_POISSON, + CONS_NON_NEGATIVE, CONS_BOUNDED_0_1, CONS_BOUNDED_GT_0_1, + CONS_BOUNDED_LT_0_1, CONS_GTE_1, CONS_GT_1, LEGACY_CONS_POISSON, double_fill, cont, kahan_sum, cont_broadcast_3, check_array_constraint, check_constraint, disc, discrete_broadcast_iii, validate_output_shape @@ -3895,7 +3895,7 @@ cdef class RandomState: Draw samples from a logarithmic series distribution. Samples are drawn from a log series distribution with specified - shape parameter, 0 < ``p`` < 1. + shape parameter, 0 <= ``p`` < 1. .. note:: New code should use the ``logseries`` method of a ``default_rng()`` @@ -3904,7 +3904,7 @@ cdef class RandomState: Parameters ---------- p : float or array_like of floats - Shape parameter for the distribution. Must be in the range (0, 1). + Shape parameter for the distribution. Must be in the range [0, 1). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3969,7 +3969,7 @@ cdef class RandomState: """ out = disc(&legacy_logseries, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, + p, 'p', CONS_BOUNDED_LT_0_1, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE) # Match historical output type diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index b550cd5083fd..73d915e02a30 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -1363,10 +1363,22 @@ def test_logseries(self): [5, 1]]) assert_array_equal(actual, desired) - def test_logseries_exceptions(self): - with np.errstate(invalid='ignore'): - assert_raises(ValueError, random.logseries, np.nan) - assert_raises(ValueError, random.logseries, [np.nan] * 10) + def test_logseries_zero(self): + random = Generator(MT19937(self.seed)) + assert random.logseries(0) == 1 + + @pytest.mark.parametrize("value", [np.nextafter(0., -1), 1., np.nan, 5.]) + def test_logseries_exceptions(self, value): + random = Generator(MT19937(self.seed)) + with np.errstate(invalid="ignore"): + with pytest.raises(ValueError): + random.logseries(value) + with pytest.raises(ValueError): + # contiguous path: + random.logseries(np.array([value] * 10)) + with pytest.raises(ValueError): + # non-contiguous path: + random.logseries(np.array([value] * 10)[::2]) def test_multinomial(self): random = Generator(MT19937(self.seed)) diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index 22b167224567..c0e42ec1ead3 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -942,11 +942,20 @@ def test_logseries(self): [3, 6]]) assert_array_equal(actual, desired) - def test_logseries_exceptions(self): - with suppress_warnings() as sup: - sup.record(RuntimeWarning) - assert_raises(ValueError, random.logseries, np.nan) - assert_raises(ValueError, random.logseries, [np.nan] * 10) + def test_logseries_zero(self): + assert random.logseries(0) == 1 + + @pytest.mark.parametrize("value", [np.nextafter(0., -1), 1., np.nan, 5.]) + def test_logseries_exceptions(self, value): + with np.errstate(invalid="ignore"): + with pytest.raises(ValueError): + random.logseries(value) + with pytest.raises(ValueError): + # contiguous path: + random.logseries(np.array([value] * 10)) + with pytest.raises(ValueError): + # non-contiguous path: + random.logseries(np.array([value] * 10)[::2]) def test_multinomial(self): random.seed(self.seed)