Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Make mmap handling safer in frombuffer #21446

Merged
merged 1 commit into from May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions numpy/core/_add_newdocs.py
Expand Up @@ -1557,6 +1557,10 @@
The data of the resulting array will not be byteswapped, but will be
interpreted correctly.

This function creates a view into the original object. This should be safe
in general, but it may make sense to copy the result when the original
object is mutable or untrusted.

Examples
--------
>>> s = b'hello world'
Expand Down
32 changes: 26 additions & 6 deletions numpy/core/src/multiarray/ctors.c
Expand Up @@ -3684,28 +3684,44 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
return NULL;
}

/*
* The array check is probably unnecessary. It preserves the base for
* arrays. This is the "old" buffer protocol, which had no release logic.
* (It was assumed that the result is always a view.)
*
* NOTE: We could also check if `bf_releasebuffer` is defined which should
* be the most precise and safe thing to do. But that should only be
* necessary if unexpected backcompat issues are found downstream.
*/
if (!PyArray_Check(buf)) {
buf = PyMemoryView_FromObject(buf);
if (buf == NULL) {
return NULL;
}
}
else {
Py_INCREF(buf);
}

if (PyObject_GetBuffer(buf, &view, PyBUF_WRITABLE|PyBUF_SIMPLE) < 0) {
writeable = 0;
PyErr_Clear();
if (PyObject_GetBuffer(buf, &view, PyBUF_SIMPLE) < 0) {
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
}
data = (char *)view.buf;
ts = view.len;
/*
* In Python 3 both of the deprecated functions PyObject_AsWriteBuffer and
* PyObject_AsReadBuffer that this code replaces release the buffer. It is
* up to the object that supplies the buffer to guarantee that the buffer
* sticks around after the release.
*/
/* `buf` is an array or a memoryview; so we know `view` does not own data */
PyBuffer_Release(&view);

if ((offset < 0) || (offset > ts)) {
PyErr_Format(PyExc_ValueError,
"offset must be non-negative and no greater than buffer "\
"length (%" NPY_INTP_FMT ")", (npy_intp)ts);
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3718,13 +3734,15 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
if (itemsize == 0) {
PyErr_SetString(PyExc_ValueError,
"cannot determine count if itemsize is 0");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
if (s % itemsize != 0) {
PyErr_SetString(PyExc_ValueError,
"buffer size must be a multiple"\
" of element size");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3735,6 +3753,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
PyErr_SetString(PyExc_ValueError,
"buffer is smaller than requested"\
" size");
Py_DECREF(buf);
Py_DECREF(type);
return NULL;
}
Expand All @@ -3744,6 +3763,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
&PyArray_Type, type,
1, &n, NULL, data,
NPY_ARRAY_DEFAULT, NULL, buf);
Py_DECREF(buf);
if (ret == NULL) {
return NULL;
}
Expand Down
24 changes: 24 additions & 0 deletions numpy/core/tests/test_multiarray.py
Expand Up @@ -18,6 +18,7 @@
import pathlib
import builtins
from decimal import Decimal
import mmap

import numpy as np
import numpy.core._multiarray_tests as _multiarray_tests
Expand Down Expand Up @@ -5349,9 +5350,32 @@ def test_basic(self, byteorder, dtype):
buf = x.tobytes()
assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat)

def test_array_base(self):
arr = np.arange(10)
new = np.frombuffer(arr)
# We currently special case arrays to ensure they are used as a base.
# This could probably be changed (removing the test).
assert new.base is arr

def test_empty(self):
assert_array_equal(np.frombuffer(b''), np.array([]))

@pytest.mark.skipif(IS_PYPY,
reason="PyPy's memoryview currently does not track exports. See: "
"https://foss.heptapod.net/pypy/pypy/-/issues/3724")
def test_mmap_close(self):
# The old buffer protocol was not safe for some things that the new
# one is. But `frombuffer` always used the old one for a long time.
# Checks that it is safe with the new one (using memoryviews)
with tempfile.TemporaryFile(mode='wb') as tmp:
tmp.write(b"asdf")
tmp.flush()
mm = mmap.mmap(tmp.fileno(), 0)
arr = np.frombuffer(mm, dtype=np.uint8)
with pytest.raises(BufferError):
mm.close() # cannot close while array uses the buffer
del arr
mm.close()

class TestFlat:
def setup(self):
Expand Down