Skip to content

Commit

Permalink
Merge pull request #20681 from charris/backport-20954
Browse files Browse the repository at this point in the history
BUG: Fix setstate logic for empty arrays
  • Loading branch information
charris committed Dec 30, 2021
2 parents f9c45f8 + 34618d5 commit 5399c03
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 47 deletions.
19 changes: 10 additions & 9 deletions numpy/core/src/multiarray/arrayobject.c
Expand Up @@ -493,14 +493,6 @@ array_dealloc(PyArrayObject *self)
if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) {
PyArray_XDECREF(self);
}
/*
* Allocation will never be 0, see comment in ctors.c
* line 820
*/
size_t nbytes = PyArray_NBYTES(self);
if (nbytes == 0) {
nbytes = fa->descr->elsize ? fa->descr->elsize : 1;
}
if (fa->mem_handler == NULL) {
char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY");
if ((env != NULL) && (strncmp(env, "1", 1) == 0)) {
Expand All @@ -511,7 +503,16 @@ array_dealloc(PyArrayObject *self)
}
// Guess at malloc/free ???
free(fa->data);
} else {
}
else {
/*
* In theory `PyArray_NBYTES_ALLOCATED`, but differs somewhere?
* So instead just use the knowledge that 0 is impossible.
*/
size_t nbytes = PyArray_NBYTES(self);
if (nbytes == 0) {
nbytes = 1;
}
PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler);
Py_DECREF(fa->mem_handler);
}
Expand Down
29 changes: 29 additions & 0 deletions numpy/core/src/multiarray/common.h
Expand Up @@ -292,6 +292,35 @@ npy_memchr(char * haystack, char needle,
return p;
}

/*
* Helper to work around issues with the allocation strategy currently
* allocating not 1 byte for empty arrays, but enough for an array where
* all 0 dimensions are replaced with size 1 (if the itemsize is not 0).
*
* This means that we can fill in nice (nonzero) strides and still handle
* slicing direct math without being in danger of leaving the allocated byte
* bounds.
* In practice, that probably does not matter, but in principle this would be
* undefined behaviour in C. Another solution may be to force the strides
* to 0 in these cases. See also gh-15788.
*
* Unlike the code in `PyArray_NewFromDescr` does no overflow checks.
*/
static NPY_INLINE npy_intp
PyArray_NBYTES_ALLOCATED(PyArrayObject *arr)
{
if (PyArray_ITEMSIZE(arr) == 0) {
return 1;
}
npy_intp nbytes = PyArray_ITEMSIZE(arr);
for (int i = 0; i < PyArray_NDIM(arr); i++) {
if (PyArray_DIMS(arr)[i] != 0) {
nbytes *= PyArray_DIMS(arr)[i];
}
}
return nbytes;
}


/*
* Simple helper to create a tuple from an array of items. The `make_null_none`
Expand Down
10 changes: 8 additions & 2 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -754,14 +754,20 @@ PyArray_NewFromDescr_int(
}
fa->strides = fa->dimensions + nd;

/* Copy dimensions, check them, and find total array size `nbytes` */
/*
* Copy dimensions, check them, and find total array size `nbytes`
*
* Note that we ignore 0-length dimensions, to match this in the `free`
* calls, `PyArray_NBYTES_ALLOCATED` is a private helper matching this
* behaviour, but without overflow checking.
*/
for (int i = 0; i < nd; i++) {
fa->dimensions[i] = dims[i];

if (fa->dimensions[i] == 0) {
/*
* Compare to PyArray_OverflowMultiplyList that
* returns 0 in this case.
* returns 0 in this case. See also `PyArray_NBYTES_ALLOCATED`.
*/
continue;
}
Expand Down
10 changes: 1 addition & 9 deletions numpy/core/src/multiarray/getset.c
Expand Up @@ -384,15 +384,7 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored))
}
if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) {
PyArray_XDECREF(self);
size_t nbytes = PyArray_NBYTES(self);
/*
* Allocation will never be 0, see comment in ctors.c
* line 820
*/
if (nbytes == 0) {
PyArray_Descr *dtype = PyArray_DESCR(self);
nbytes = dtype->elsize ? dtype->elsize : 1;
}
size_t nbytes = PyArray_NBYTES_ALLOCATED(self);
PyObject *handler = PyArray_HANDLER(self);
if (handler == NULL) {
/* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */
Expand Down
64 changes: 37 additions & 27 deletions numpy/core/src/multiarray/methods.c
Expand Up @@ -1934,7 +1934,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
PyObject *rawdata = NULL;
char *datastr;
Py_ssize_t len;
npy_intp size, dimensions[NPY_MAXDIMS];
npy_intp dimensions[NPY_MAXDIMS];
int nd;
npy_intp nbytes;
int overflowed;
Expand Down Expand Up @@ -1976,29 +1976,47 @@ array_setstate(PyArrayObject *self, PyObject *args)
* since fa could be a 0-d or scalar, and then
* PyDataMem_UserFREE will be confused
*/
size_t n_tofree = PyArray_NBYTES(self);
if (n_tofree == 0) {
PyArray_Descr *dtype = PyArray_DESCR(self);
n_tofree = dtype->elsize ? dtype->elsize : 1;
}
size_t n_tofree = PyArray_NBYTES_ALLOCATED(self);
Py_XDECREF(PyArray_DESCR(self));
fa->descr = typecode;
Py_INCREF(typecode);
nd = PyArray_IntpFromSequence(shape, dimensions, NPY_MAXDIMS);
if (nd < 0) {
return NULL;
}
size = PyArray_MultiplyList(dimensions, nd);
if (size < 0) {
/* More items than are addressable */
return PyErr_NoMemory();
/*
* We should do two things here:
* 1. Validate the input, that it is neither invalid, nor "too big"
* ("too big" ignores dimensios of size 0).
* 2. Find `PyArray_NBYTES` of the result, as this is what we may need to
* copy from the pickled data (may not match allocation currently if 0).
* Compare with `PyArray_NewFromDescr`, raise MemoryError for simplicity.
*/
npy_bool empty = NPY_FALSE;
nbytes = 1;
for (int i = 0; i < nd; i++) {
if (dimensions[i] < 0) {
PyErr_SetString(PyExc_TypeError,
"impossible dimension while unpickling array");
return NULL;
}
if (dimensions[i] == 0) {
empty = NPY_TRUE;
}
overflowed = npy_mul_with_overflow_intp(
&nbytes, nbytes, dimensions[i]);
if (overflowed) {
return PyErr_NoMemory();
}
}
overflowed = npy_mul_with_overflow_intp(
&nbytes, size, PyArray_DESCR(self)->elsize);
&nbytes, nbytes, PyArray_DESCR(self)->elsize);
if (overflowed) {
/* More bytes than are addressable */
return PyErr_NoMemory();
}
if (empty) {
nbytes = 0;
}

if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
if (!PyList_Check(rawdata)) {
Expand Down Expand Up @@ -2039,8 +2057,7 @@ array_setstate(PyArrayObject *self, PyObject *args)

if (len != nbytes) {
PyErr_SetString(PyExc_ValueError,
"buffer size does not" \
" match array size");
"buffer size does not match array size");
Py_DECREF(rawdata);
return NULL;
}
Expand Down Expand Up @@ -2097,21 +2114,18 @@ array_setstate(PyArrayObject *self, PyObject *args)
/* Bytes should always be considered immutable, but we just grab the
* pointer if they are large, to save memory. */
if (!IsAligned(self) || swap || (len <= 1000)) {
npy_intp num = PyArray_NBYTES(self);
if (num == 0) {
Py_DECREF(rawdata);
Py_RETURN_NONE;
}
npy_intp num = PyArray_NBYTES_ALLOCATED(self);
/* Store the handler in case the default is modified */
Py_XDECREF(fa->mem_handler);
fa->mem_handler = PyDataMem_GetHandler();
if (fa->mem_handler == NULL) {
Py_CLEAR(fa->mem_handler);
Py_DECREF(rawdata);
return NULL;
}
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
Py_DECREF(fa->mem_handler);
Py_CLEAR(fa->mem_handler);
Py_DECREF(rawdata);
return PyErr_NoMemory();
}
Expand Down Expand Up @@ -2158,11 +2172,8 @@ array_setstate(PyArrayObject *self, PyObject *args)
}
}
else {
npy_intp num = PyArray_NBYTES(self);
int elsize = PyArray_DESCR(self)->elsize;
if (num == 0 || elsize == 0) {
Py_RETURN_NONE;
}
npy_intp num = PyArray_NBYTES_ALLOCATED(self);

/* Store the functions in case the default handler is modified */
Py_XDECREF(fa->mem_handler);
fa->mem_handler = PyDataMem_GetHandler();
Expand All @@ -2171,7 +2182,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
}
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
Py_DECREF(fa->mem_handler);
Py_CLEAR(fa->mem_handler);
return PyErr_NoMemory();
}
if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) {
Expand All @@ -2180,7 +2191,6 @@ array_setstate(PyArrayObject *self, PyObject *args)
PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA);
fa->base = NULL;
if (_setlist_pkl(self, rawdata) < 0) {
Py_DECREF(fa->mem_handler);
return NULL;
}
}
Expand Down

0 comments on commit 5399c03

Please sign in to comment.