Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix subarray dtype used with too large count in fromfile #17917

Merged
merged 1 commit into from Dec 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 13 additions & 9 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -3514,7 +3514,7 @@ PyArray_ArangeObj(PyObject *start, PyObject *stop, PyObject *step, PyArray_Descr
return NULL;
}

/* This array creation function steals the reference to dtype. */
/* This array creation function does not steal the reference to dtype. */
static PyArrayObject *
array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nread)
{
Expand Down Expand Up @@ -3542,7 +3542,6 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea
if (fail) {
PyErr_SetString(PyExc_IOError,
"could not seek in file");
Py_DECREF(dtype);
return NULL;
}
num = numbytes / dtype->elsize;
Expand All @@ -3554,6 +3553,7 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea
*/
elsize = dtype->elsize;

Py_INCREF(dtype); /* do not steal the original dtype. */
r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &num,
NULL, NULL, 0, NULL);
if (r == NULL) {
Expand All @@ -3569,7 +3569,7 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea
/*
* Create an array by reading from the given stream, using the passed
* next_element and skip_separator functions.
* As typical for array creation functions, it steals the reference to dtype.
* Does not steal the reference to dtype.
*/
#define FROM_BUFFER_SIZE 4096
static PyArrayObject *
Expand Down Expand Up @@ -3598,7 +3598,6 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre
PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &size,
NULL, NULL, 0, NULL);
if (r == NULL) {
Py_DECREF(dtype);
return NULL;
}

Expand Down Expand Up @@ -3661,7 +3660,6 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre
if (PyErr_Occurred()) {
/* If an error is already set (unlikely), do not create new one */
Py_DECREF(r);
Py_DECREF(dtype);
return NULL;
}
/* 2019-09-12, NumPy 1.18 */
Expand All @@ -3673,7 +3671,6 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre
}

fail:
Py_DECREF(dtype);
if (err == 1) {
PyErr_NoMemory();
}
Expand Down Expand Up @@ -3739,20 +3736,26 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep)
(skip_separator) fromfile_skip_separator, NULL);
}
if (ret == NULL) {
Py_DECREF(dtype);
return NULL;
}
if (((npy_intp) nread) < num) {
/* Realloc memory for smaller number of elements */
const size_t nsize = PyArray_MAX(nread,1)*PyArray_DESCR(ret)->elsize;
/*
* Realloc memory for smaller number of elements, use original dtype
* which may have include a subarray (and is used for `nread`).
*/
const size_t nsize = PyArray_MAX(nread,1) * dtype->elsize;
char *tmp;

if((tmp = PyDataMem_RENEW(PyArray_DATA(ret), nsize)) == NULL) {
if ((tmp = PyDataMem_RENEW(PyArray_DATA(ret), nsize)) == NULL) {
Py_DECREF(dtype);
Py_DECREF(ret);
return PyErr_NoMemory();
}
((PyArrayObject_fields *)ret)->data = tmp;
PyArray_DIMS(ret)[0] = nread;
}
Py_DECREF(dtype);
return (PyObject *)ret;
}

Expand Down Expand Up @@ -3964,6 +3967,7 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype,
(next_element) fromstr_next_element,
(skip_separator) fromstr_skip_separator,
end);
Py_DECREF(dtype);
}
return (PyObject *)ret;
}
Expand Down
27 changes: 27 additions & 0 deletions numpy/core/tests/test_multiarray.py
Expand Up @@ -5020,6 +5020,33 @@ def test_fromfile_subarray_binary(self):
res = np.fromstring(x_str, dtype="(3,4)i4")
assert_array_equal(x, res)

def test_parsing_subarray_unsupported(self):
# We currently do not support parsing subarray dtypes
data = "12,42,13," * 50
with pytest.raises(ValueError):
expected = np.fromstring(data, dtype="(3,)i", sep=",")

with open(self.filename, "w") as f:
f.write(data)

with pytest.raises(ValueError):
np.fromfile(self.filename, dtype="(3,)i", sep=",")

def test_read_shorter_than_count_subarray(self):
# Test that requesting more values does not cause any problems
# in conjuction with subarray dimensions being absored into the
# array dimension.
expected = np.arange(511 * 10, dtype="i").reshape(-1, 10)

binary = expected.tobytes()
with pytest.raises(ValueError):
with pytest.warns(DeprecationWarning):
np.fromstring(binary, dtype="(10,)i", count=10000)

expected.tofile(self.filename)
res = np.fromfile(self.filename, dtype="(10,)i", count=10000)
assert_array_equal(res, expected)


class TestFromBuffer:
@pytest.mark.parametrize('byteorder', ['<', '>'])
Expand Down