Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix setstate logic for empty arrays #20681

Merged
merged 3 commits into from Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 10 additions & 9 deletions numpy/core/src/multiarray/arrayobject.c
Expand Up @@ -493,14 +493,6 @@ array_dealloc(PyArrayObject *self)
if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) {
PyArray_XDECREF(self);
}
/*
* Allocation will never be 0, see comment in ctors.c
* line 820
*/
size_t nbytes = PyArray_NBYTES(self);
if (nbytes == 0) {
nbytes = fa->descr->elsize ? fa->descr->elsize : 1;
}
if (fa->mem_handler == NULL) {
char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY");
if ((env != NULL) && (strncmp(env, "1", 1) == 0)) {
Expand All @@ -511,7 +503,16 @@ array_dealloc(PyArrayObject *self)
}
// Guess at malloc/free ???
free(fa->data);
} else {
}
else {
/*
* In theory `PyArray_NBYTES_ALLOCATED`, but differs somewhere?
* So instead just use the knowledge that 0 is impossible.
*/
size_t nbytes = PyArray_NBYTES(self);
if (nbytes == 0) {
nbytes = 1;
}
PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler);
Py_DECREF(fa->mem_handler);
}
Expand Down
29 changes: 29 additions & 0 deletions numpy/core/src/multiarray/common.h
Expand Up @@ -292,6 +292,35 @@ npy_memchr(char * haystack, char needle,
return p;
}

/*
* Helper to work around issues with the allocation strategy currently
* allocating not 1 byte for empty arrays, but enough for an array where
* all 0 dimensions are replaced with size 1 (if the itemsize is not 0).
*
* This means that we can fill in nice (nonzero) strides and still handle
* slicing direct math without being in danger of leaving the allocated byte
* bounds.
* In practice, that probably does not matter, but in principle this would be
* undefined behaviour in C. Another solution may be to force the strides
* to 0 in these cases. See also gh-15788.
*
* Unlike the code in `PyArray_NewFromDescr` does no overflow checks.
*/
static NPY_INLINE npy_intp
PyArray_NBYTES_ALLOCATED(PyArrayObject *arr)
{
if (PyArray_ITEMSIZE(arr) == 0) {
return 1;
}
npy_intp nbytes = PyArray_ITEMSIZE(arr);
for (int i = 0; i < PyArray_NDIM(arr); i++) {
if (PyArray_DIMS(arr)[i] != 0) {
nbytes *= PyArray_DIMS(arr)[i];
}
}
return nbytes;
}


/*
* Simple helper to create a tuple from an array of items. The `make_null_none`
Expand Down
10 changes: 8 additions & 2 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -754,14 +754,20 @@ PyArray_NewFromDescr_int(
}
fa->strides = fa->dimensions + nd;

/* Copy dimensions, check them, and find total array size `nbytes` */
/*
* Copy dimensions, check them, and find total array size `nbytes`
*
* Note that we ignore 0-length dimensions, to match this in the `free`
* calls, `PyArray_NBYTES_ALLOCATED` is a private helper matching this
* behaviour, but without overflow checking.
*/
for (int i = 0; i < nd; i++) {
fa->dimensions[i] = dims[i];

if (fa->dimensions[i] == 0) {
/*
* Compare to PyArray_OverflowMultiplyList that
* returns 0 in this case.
* returns 0 in this case. See also `PyArray_NBYTES_ALLOCATED`.
*/
continue;
}
Expand Down
10 changes: 1 addition & 9 deletions numpy/core/src/multiarray/getset.c
Expand Up @@ -384,15 +384,7 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored))
}
if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) {
PyArray_XDECREF(self);
size_t nbytes = PyArray_NBYTES(self);
/*
* Allocation will never be 0, see comment in ctors.c
* line 820
*/
if (nbytes == 0) {
PyArray_Descr *dtype = PyArray_DESCR(self);
nbytes = dtype->elsize ? dtype->elsize : 1;
}
size_t nbytes = PyArray_NBYTES_ALLOCATED(self);
PyObject *handler = PyArray_HANDLER(self);
if (handler == NULL) {
/* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */
Expand Down
64 changes: 37 additions & 27 deletions numpy/core/src/multiarray/methods.c
Expand Up @@ -1934,7 +1934,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
PyObject *rawdata = NULL;
char *datastr;
Py_ssize_t len;
npy_intp size, dimensions[NPY_MAXDIMS];
npy_intp dimensions[NPY_MAXDIMS];
int nd;
npy_intp nbytes;
int overflowed;
Expand Down Expand Up @@ -1976,29 +1976,47 @@ array_setstate(PyArrayObject *self, PyObject *args)
* since fa could be a 0-d or scalar, and then
* PyDataMem_UserFREE will be confused
*/
size_t n_tofree = PyArray_NBYTES(self);
if (n_tofree == 0) {
PyArray_Descr *dtype = PyArray_DESCR(self);
n_tofree = dtype->elsize ? dtype->elsize : 1;
}
size_t n_tofree = PyArray_NBYTES_ALLOCATED(self);
Py_XDECREF(PyArray_DESCR(self));
fa->descr = typecode;
Py_INCREF(typecode);
nd = PyArray_IntpFromSequence(shape, dimensions, NPY_MAXDIMS);
if (nd < 0) {
return NULL;
}
size = PyArray_MultiplyList(dimensions, nd);
if (size < 0) {
/* More items than are addressable */
return PyErr_NoMemory();
/*
* We should do two things here:
* 1. Validate the input, that it is neither invalid, nor "too big"
* ("too big" ignores dimensios of size 0).
* 2. Find `PyArray_NBYTES` of the result, as this is what we may need to
* copy from the pickled data (may not match allocation currently if 0).
* Compare with `PyArray_NewFromDescr`, raise MemoryError for simplicity.
*/
npy_bool empty = NPY_FALSE;
nbytes = 1;
for (int i = 0; i < nd; i++) {
if (dimensions[i] < 0) {
PyErr_SetString(PyExc_TypeError,
"impossible dimension while unpickling array");
return NULL;
}
if (dimensions[i] == 0) {
empty = NPY_TRUE;
}
overflowed = npy_mul_with_overflow_intp(
&nbytes, nbytes, dimensions[i]);
if (overflowed) {
return PyErr_NoMemory();
}
}
overflowed = npy_mul_with_overflow_intp(
&nbytes, size, PyArray_DESCR(self)->elsize);
&nbytes, nbytes, PyArray_DESCR(self)->elsize);
if (overflowed) {
/* More bytes than are addressable */
return PyErr_NoMemory();
}
if (empty) {
nbytes = 0;
}

if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
if (!PyList_Check(rawdata)) {
Expand Down Expand Up @@ -2039,8 +2057,7 @@ array_setstate(PyArrayObject *self, PyObject *args)

if (len != nbytes) {
PyErr_SetString(PyExc_ValueError,
"buffer size does not" \
" match array size");
"buffer size does not match array size");
Py_DECREF(rawdata);
return NULL;
}
Expand Down Expand Up @@ -2097,21 +2114,18 @@ array_setstate(PyArrayObject *self, PyObject *args)
/* Bytes should always be considered immutable, but we just grab the
* pointer if they are large, to save memory. */
if (!IsAligned(self) || swap || (len <= 1000)) {
npy_intp num = PyArray_NBYTES(self);
if (num == 0) {
Py_DECREF(rawdata);
Py_RETURN_NONE;
}
npy_intp num = PyArray_NBYTES_ALLOCATED(self);
/* Store the handler in case the default is modified */
Py_XDECREF(fa->mem_handler);
fa->mem_handler = PyDataMem_GetHandler();
if (fa->mem_handler == NULL) {
Py_CLEAR(fa->mem_handler);
Py_DECREF(rawdata);
return NULL;
}
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
Py_DECREF(fa->mem_handler);
Py_CLEAR(fa->mem_handler);
Py_DECREF(rawdata);
return PyErr_NoMemory();
}
Expand Down Expand Up @@ -2158,11 +2172,8 @@ array_setstate(PyArrayObject *self, PyObject *args)
}
}
else {
npy_intp num = PyArray_NBYTES(self);
int elsize = PyArray_DESCR(self)->elsize;
if (num == 0 || elsize == 0) {
Py_RETURN_NONE;
}
npy_intp num = PyArray_NBYTES_ALLOCATED(self);

/* Store the functions in case the default handler is modified */
Py_XDECREF(fa->mem_handler);
fa->mem_handler = PyDataMem_GetHandler();
Expand All @@ -2171,7 +2182,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
}
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
Py_DECREF(fa->mem_handler);
Py_CLEAR(fa->mem_handler);
return PyErr_NoMemory();
}
if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) {
Expand All @@ -2180,7 +2191,6 @@ array_setstate(PyArrayObject *self, PyObject *args)
PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA);
fa->base = NULL;
if (_setlist_pkl(self, rawdata) < 0) {
Py_DECREF(fa->mem_handler);
return NULL;
}
}
Expand Down