diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 1b197d0f246b..f99de2a39b4f 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -493,14 +493,6 @@ array_dealloc(PyArrayObject *self) if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) { PyArray_XDECREF(self); } - /* - * Allocation will never be 0, see comment in ctors.c - * line 820 - */ - size_t nbytes = PyArray_NBYTES(self); - if (nbytes == 0) { - nbytes = fa->descr->elsize ? fa->descr->elsize : 1; - } if (fa->mem_handler == NULL) { char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY"); if ((env != NULL) && (strncmp(env, "1", 1) == 0)) { @@ -511,7 +503,16 @@ array_dealloc(PyArrayObject *self) } // Guess at malloc/free ??? free(fa->data); - } else { + } + else { + /* + * In theory `PyArray_NBYTES_ALLOCATED`, but differs somewhere? + * So instead just use the knowledge that 0 is impossible. + */ + size_t nbytes = PyArray_NBYTES(self); + if (nbytes == 0) { + nbytes = 1; + } PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler); Py_DECREF(fa->mem_handler); } diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index b3526c4c1de2..85fd3aab1f40 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -292,6 +292,35 @@ npy_memchr(char * haystack, char needle, return p; } +/* + * Helper to work around issues with the allocation strategy currently + * allocating not 1 byte for empty arrays, but enough for an array where + * all 0 dimensions are replaced with size 1 (if the itemsize is not 0). + * + * This means that we can fill in nice (nonzero) strides and still handle + * slicing direct math without being in danger of leaving the allocated byte + * bounds. + * In practice, that probably does not matter, but in principle this would be + * undefined behaviour in C. Another solution may be to force the strides + * to 0 in these cases. See also gh-15788. + * + * Unlike the code in `PyArray_NewFromDescr` does no overflow checks. + */ +static NPY_INLINE npy_intp +PyArray_NBYTES_ALLOCATED(PyArrayObject *arr) +{ + if (PyArray_ITEMSIZE(arr) == 0) { + return 1; + } + npy_intp nbytes = PyArray_ITEMSIZE(arr); + for (int i = 0; i < PyArray_NDIM(arr); i++) { + if (PyArray_DIMS(arr)[i] != 0) { + nbytes *= PyArray_DIMS(arr)[i]; + } + } + return nbytes; +} + /* * Simple helper to create a tuple from an array of items. The `make_null_none` diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index e22708f39650..78003306afe9 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -754,14 +754,20 @@ PyArray_NewFromDescr_int( } fa->strides = fa->dimensions + nd; - /* Copy dimensions, check them, and find total array size `nbytes` */ + /* + * Copy dimensions, check them, and find total array size `nbytes` + * + * Note that we ignore 0-length dimensions, to match this in the `free` + * calls, `PyArray_NBYTES_ALLOCATED` is a private helper matching this + * behaviour, but without overflow checking. + */ for (int i = 0; i < nd; i++) { fa->dimensions[i] = dims[i]; if (fa->dimensions[i] == 0) { /* * Compare to PyArray_OverflowMultiplyList that - * returns 0 in this case. + * returns 0 in this case. See also `PyArray_NBYTES_ALLOCATED`. */ continue; } diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 2544ca9e7301..a92ac44b7846 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -384,15 +384,7 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored)) } if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) { PyArray_XDECREF(self); - size_t nbytes = PyArray_NBYTES(self); - /* - * Allocation will never be 0, see comment in ctors.c - * line 820 - */ - if (nbytes == 0) { - PyArray_Descr *dtype = PyArray_DESCR(self); - nbytes = dtype->elsize ? dtype->elsize : 1; - } + size_t nbytes = PyArray_NBYTES_ALLOCATED(self); PyObject *handler = PyArray_HANDLER(self); if (handler == NULL) { /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */ diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index b0b6f42f12ca..c31a8292ce32 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -1934,7 +1934,7 @@ array_setstate(PyArrayObject *self, PyObject *args) PyObject *rawdata = NULL; char *datastr; Py_ssize_t len; - npy_intp size, dimensions[NPY_MAXDIMS]; + npy_intp dimensions[NPY_MAXDIMS]; int nd; npy_intp nbytes; int overflowed; @@ -1976,11 +1976,7 @@ array_setstate(PyArrayObject *self, PyObject *args) * since fa could be a 0-d or scalar, and then * PyDataMem_UserFREE will be confused */ - size_t n_tofree = PyArray_NBYTES(self); - if (n_tofree == 0) { - PyArray_Descr *dtype = PyArray_DESCR(self); - n_tofree = dtype->elsize ? dtype->elsize : 1; - } + size_t n_tofree = PyArray_NBYTES_ALLOCATED(self); Py_XDECREF(PyArray_DESCR(self)); fa->descr = typecode; Py_INCREF(typecode); @@ -1988,17 +1984,39 @@ array_setstate(PyArrayObject *self, PyObject *args) if (nd < 0) { return NULL; } - size = PyArray_MultiplyList(dimensions, nd); - if (size < 0) { - /* More items than are addressable */ - return PyErr_NoMemory(); + /* + * We should do two things here: + * 1. Validate the input, that it is neither invalid, nor "too big" + * ("too big" ignores dimensios of size 0). + * 2. Find `PyArray_NBYTES` of the result, as this is what we may need to + * copy from the pickled data (may not match allocation currently if 0). + * Compare with `PyArray_NewFromDescr`, raise MemoryError for simplicity. + */ + npy_bool empty = NPY_FALSE; + nbytes = 1; + for (int i = 0; i < nd; i++) { + if (dimensions[i] < 0) { + PyErr_SetString(PyExc_TypeError, + "impossible dimension while unpickling array"); + return NULL; + } + if (dimensions[i] == 0) { + empty = NPY_TRUE; + } + overflowed = npy_mul_with_overflow_intp( + &nbytes, nbytes, dimensions[i]); + if (overflowed) { + return PyErr_NoMemory(); + } } overflowed = npy_mul_with_overflow_intp( - &nbytes, size, PyArray_DESCR(self)->elsize); + &nbytes, nbytes, PyArray_DESCR(self)->elsize); if (overflowed) { - /* More bytes than are addressable */ return PyErr_NoMemory(); } + if (empty) { + nbytes = 0; + } if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) { if (!PyList_Check(rawdata)) { @@ -2039,8 +2057,7 @@ array_setstate(PyArrayObject *self, PyObject *args) if (len != nbytes) { PyErr_SetString(PyExc_ValueError, - "buffer size does not" \ - " match array size"); + "buffer size does not match array size"); Py_DECREF(rawdata); return NULL; } @@ -2097,21 +2114,18 @@ array_setstate(PyArrayObject *self, PyObject *args) /* Bytes should always be considered immutable, but we just grab the * pointer if they are large, to save memory. */ if (!IsAligned(self) || swap || (len <= 1000)) { - npy_intp num = PyArray_NBYTES(self); - if (num == 0) { - Py_DECREF(rawdata); - Py_RETURN_NONE; - } + npy_intp num = PyArray_NBYTES_ALLOCATED(self); /* Store the handler in case the default is modified */ Py_XDECREF(fa->mem_handler); fa->mem_handler = PyDataMem_GetHandler(); if (fa->mem_handler == NULL) { + Py_CLEAR(fa->mem_handler); Py_DECREF(rawdata); return NULL; } fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self)); if (PyArray_DATA(self) == NULL) { - Py_DECREF(fa->mem_handler); + Py_CLEAR(fa->mem_handler); Py_DECREF(rawdata); return PyErr_NoMemory(); } @@ -2158,11 +2172,8 @@ array_setstate(PyArrayObject *self, PyObject *args) } } else { - npy_intp num = PyArray_NBYTES(self); - int elsize = PyArray_DESCR(self)->elsize; - if (num == 0 || elsize == 0) { - Py_RETURN_NONE; - } + npy_intp num = PyArray_NBYTES_ALLOCATED(self); + /* Store the functions in case the default handler is modified */ Py_XDECREF(fa->mem_handler); fa->mem_handler = PyDataMem_GetHandler(); @@ -2171,7 +2182,7 @@ array_setstate(PyArrayObject *self, PyObject *args) } fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self)); if (PyArray_DATA(self) == NULL) { - Py_DECREF(fa->mem_handler); + Py_CLEAR(fa->mem_handler); return PyErr_NoMemory(); } if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) { @@ -2180,7 +2191,6 @@ array_setstate(PyArrayObject *self, PyObject *args) PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA); fa->base = NULL; if (_setlist_pkl(self, rawdata) < 0) { - Py_DECREF(fa->mem_handler); return NULL; } }