From 53c170078dba33232308cf020532bd68ac5d4f2e Mon Sep 17 00:00:00 2001 From: David Hewitt <1939362+davidhewitt@users.noreply.github.com> Date: Sat, 6 Nov 2021 11:21:15 +0000 Subject: [PATCH] pymethods: seq methods from mapping methods --- CHANGELOG.md | 1 + guide/src/class/protocols.md | 7 +- guide/src/migration.md | 32 ++++- pyo3-macros-backend/src/pyimpl.rs | 2 + pyo3-macros-backend/src/pymethod.rs | 15 +- src/class/impl_.rs | 38 ++++- src/pyclass.rs | 177 +++++++++++++---------- tests/test_proto_methods.rs | 209 ++++++++++++++++++++++++---- 8 files changed, 377 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd43f8544c7..2dcde8d55c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `PyErr::new_type` now takes an optional docstring and now returns `PyResult>` rather than a `ffi::PyTypeObject` pointer. - The `create_exception!` macro can now take an optional docstring. This docstring, if supplied, is visible to users (with `.__doc__` and `help()`) and accompanies your error type in your crate's documentation. +- `__getitem__`, `__setitem__` and `__delitem__` in `#[pymethods]` now implement both a Python mapping and sequence by default. [#2065](https://github.com/PyO3/pyo3/pull/2065) - Improve performance and error messages for `#[derive(FromPyObject)]` for enums. [#2068](https://github.com/PyO3/pyo3/pull/2068) - Reduce generated LLVM code size (to improve compile times) for: - internal `handle_panic` helper [#2074](https://github.com/PyO3/pyo3/pull/2074) diff --git a/guide/src/class/protocols.md b/guide/src/class/protocols.md index fc6867c8897..d0d570decc4 100644 --- a/guide/src/class/protocols.md +++ b/guide/src/class/protocols.md @@ -18,7 +18,6 @@ In PyO3 0.15, if a function name in `#[pymethods]` is a recognised magic method, The magic methods handled by PyO3 are very similar to the standard Python ones on [this page](https://docs.python.org/3/reference/datamodel.html#special-method-names) - in particular they are the the subset which have slots as [defined here](https://docs.python.org/3/c-api/typeobj.html). Some of the slots do not have a magic method in Python, which leads to a few additional magic methods defined only in PyO3: - Magic methods for garbage collection - Magic methods for the buffer protocol - - Magic methods for the sequence protocol When PyO3 handles a magic method, a couple of changes apply compared to other `#[pymethods]`: - The `#[pyo3(text_signature = "...")]` attribute is not allowed @@ -86,11 +85,7 @@ given signatures should be interpreted as follows: - `__aiter__() -> object` - `__anext__() -> Option or IterANextOutput` -#### Sequence types - -TODO; see [#1884](https://github.com/PyO3/pyo3/issues/1884) - -#### Mapping types +#### Mapping & Sequence types - `__len__() -> usize` - `__contains__(, object) -> bool` diff --git a/guide/src/migration.md b/guide/src/migration.md index 4c298bf2077..46113b60405 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -5,10 +5,40 @@ For a detailed list of all changes, see the [CHANGELOG](changelog.md). ## from 0.15.* to 0.16 -### Drop support for older technogies +### Drop support for older technologies PyO3 0.16 has increased minimum Rust version to 1.48 and minimum Python version to 3.7. This enables ore use of newer language features (enabling some of the other additions in 0.16) and simplifies maintenance of the project. +### Container magic methods now match Python behavior + +In PyO3 0.15, `__getitem__`, `__setitem__` and `__delitem__` in `#[pymethods]` would generate only the _mapping_ implementation for a `#[pyclass]`. To match the Python behavior, these methods now generate both the _mapping_ **and** _sequence_ implementations. + +This means that classes implementing these `#[pymethods]` will now also be treated as sequences, same as a Python `class` would be. Small differences in behavior may result: + - PyO3 will allow instances of these classes to be cast to `PySequence` as well as `PyMapping`. + - Python will provide a default implementation of `__iter__` (if the class did not have one) which repeatedly calls `__getitem__` with integers (starting at 0) until an `IndexError` is raised. + +To explain this in detail, consider the following Python class: + +```python +class ExampleContainer: + + def __len__(self): + return 5 + + def __getitem__(self, idx: int) -> int: + if idx < 0 or idx > 5: + raise IndexError() + return idx +``` + +This class implements a Python [sequence](https://docs.python.org/3/glossary.html#term-sequence). + +The `__len__` and `__getitem__` methods are also used to implement a Python [mapping](https://docs.python.org/3/glossary.html#term-mapping). In the Python C-API, these methods are not shared: the sequence `__len__` and `__getitem__` are defined by the `sq_len` and `sq_item` slots, and the mapping equivalents are `mp_len` and `mp_subscript`. There are similar distinctions for `__setitem__` and `__delitem__`. + +Because there is no such distinction from Python, implementing these methods will fill the mapping and sequence slots simultaneously. A Python class with `__len__` implemented, for example, will have both the `sq_len` and `mp_len` slots filled. + +The PyO3 behavior in 0.16 has been changed to be closer to this Python behavior by default. + ## from 0.14.* to 0.15 ### Changes in sequence indexing diff --git a/pyo3-macros-backend/src/pyimpl.rs b/pyo3-macros-backend/src/pyimpl.rs index 46fe05b9032..bb5a90f75ae 100644 --- a/pyo3-macros-backend/src/pyimpl.rs +++ b/pyo3-macros-backend/src/pyimpl.rs @@ -293,6 +293,8 @@ fn add_shared_proto_slots( ); try_add_shared_slot!("__pow__", "__rpow__", generate_pyclass_pow_slot); + // if this assertion trips, a slot fragment has been implemented which has not been added in the + // list above assert!(implemented_proto_fragments.is_empty()); } diff --git a/pyo3-macros-backend/src/pymethod.rs b/pyo3-macros-backend/src/pymethod.rs index 31efb6b3c0d..57aac487cec 100644 --- a/pyo3-macros-backend/src/pymethod.rs +++ b/pyo3-macros-backend/src/pymethod.rs @@ -704,8 +704,21 @@ impl Ty { let #ident = #extract; } } + Ty::PySsizeT => { + let ty = arg.ty; + let extract = handle_error( + extract_error_mode, + py, + quote! { + ::std::convert::TryInto::<#ty>::try_into(#ident).map_err(|e| _pyo3::exceptions::PyValueError::new_err(e.to_string())) + }, + ); + quote! { + let #ident = #extract; + } + } // Just pass other types through unmodified - Ty::PyBuffer | Ty::Int | Ty::PyHashT | Ty::PySsizeT | Ty::Void => quote! {}, + Ty::PyBuffer | Ty::Int | Ty::PyHashT | Ty::Void => quote! {}, } } } diff --git a/src/class/impl_.rs b/src/class/impl_.rs index dc1d74a0375..a3041da9f61 100644 --- a/src/class/impl_.rs +++ b/src/class/impl_.rs @@ -9,7 +9,12 @@ use crate::{ type_object::{PyLayout, PyTypeObject}, PyClass, PyMethodDefType, PyNativeType, PyResult, PyTypeInfo, Python, }; -use std::{marker::PhantomData, os::raw::c_void, ptr::NonNull, thread}; +use std::{ + marker::PhantomData, + os::raw::{c_int, c_void}, + ptr::NonNull, + thread, +}; /// This type is used as a "dummy" type on which dtolnay specializations are /// applied to apply implementations from `#[pymethods]` & `#[pyproto]` @@ -780,3 +785,34 @@ pub(crate) unsafe extern "C" fn fallback_new( pub(crate) unsafe extern "C" fn tp_dealloc(obj: *mut ffi::PyObject) { crate::callback_body!(py, T::Layout::tp_dealloc(obj, py)) } + +pub(crate) unsafe extern "C" fn get_sequence_item_from_mapping( + obj: *mut ffi::PyObject, + index: ffi::Py_ssize_t, +) -> *mut ffi::PyObject { + let index = ffi::PyLong_FromSsize_t(index); + if index.is_null() { + return std::ptr::null_mut(); + } + let result = ffi::PyObject_GetItem(obj, index); + ffi::Py_DECREF(index); + result +} + +pub(crate) unsafe extern "C" fn assign_sequence_item_from_mapping( + obj: *mut ffi::PyObject, + index: ffi::Py_ssize_t, + value: *mut ffi::PyObject, +) -> c_int { + let index = ffi::PyLong_FromSsize_t(index); + if index.is_null() { + return -1; + } + let result = if value.is_null() { + ffi::PyObject_DelItem(obj, index) + } else { + ffi::PyObject_SetItem(obj, index, value) + }; + ffi::Py_DECREF(index); + result +} diff --git a/src/pyclass.rs b/src/pyclass.rs index b2e1b0fc8fe..bbbca7f3b8f 100644 --- a/src/pyclass.rs +++ b/src/pyclass.rs @@ -1,6 +1,9 @@ //! `PyClass` and related traits. use crate::{ - class::impl_::{fallback_new, tp_dealloc, PyClassImpl}, + class::impl_::{ + assign_sequence_item_from_mapping, fallback_new, get_sequence_item_from_mapping, + tp_dealloc, PyClassImpl, + }, ffi, impl_::pyclass::{PyClassDict, PyClassWeakRef}, PyCell, PyErr, PyMethodDefType, PyNativeType, PyResult, PyTypeInfo, Python, @@ -114,28 +117,35 @@ unsafe fn create_type_object_impl( } } + let PyClassInfo { + method_defs, + property_defs, + } = method_defs_to_pyclass_info(for_each_method_def, dict_offset.is_none()); + // normal methods - let methods = py_class_method_defs(for_each_method_def); - if !methods.is_empty() { - push_slot(&mut slots, ffi::Py_tp_methods, into_raw(methods)); + if !method_defs.is_empty() { + push_slot(&mut slots, ffi::Py_tp_methods, into_raw(method_defs)); } // properties - let props = py_class_properties(dict_offset.is_none(), for_each_method_def); - if !props.is_empty() { - push_slot(&mut slots, ffi::Py_tp_getset, into_raw(props)); + if !property_defs.is_empty() { + push_slot(&mut slots, ffi::Py_tp_getset, into_raw(property_defs)); } // protocol methods + let mut has_getitem = false; + let mut has_setitem = false; let mut has_gc_methods = false; + // Before Python 3.9, need to patch in buffer methods manually (they don't work in slots) #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] let mut buffer_procs: ffi::PyBufferProcs = Default::default(); for_each_proto_slot(&mut |proto_slots| { for slot in proto_slots { + has_getitem |= slot.slot == ffi::Py_mp_subscript; + has_setitem |= slot.slot == ffi::Py_mp_ass_subscript; has_gc_methods |= slot.slot == ffi::Py_tp_clear || slot.slot == ffi::Py_tp_traverse; - #[cfg(all(not(Py_3_9), not(Py_LIMITED_API)))] if slot.slot == ffi::Py_bf_getbuffer { // Safety: slot.pfunc is a valid function pointer @@ -151,7 +161,31 @@ unsafe fn create_type_object_impl( slots.extend_from_slice(proto_slots); }); + // If mapping methods implemented, define sequence methods get implemented too. + // CPython does the same for Python `class` statements. + + // NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding + // the length to negative indices. + + if has_getitem { + push_slot( + &mut slots, + ffi::Py_sq_item, + get_sequence_item_from_mapping as _, + ); + } + + if has_setitem { + push_slot( + &mut slots, + ffi::Py_sq_ass_item, + assign_sequence_item_from_mapping as _, + ); + } + + // Add empty sentinel at the end push_slot(&mut slots, 0, ptr::null_mut()); + let mut spec = ffi::PyType_Spec { name: py_class_qualified_name(module_name, name)?, basicsize: basicsize as c_int, @@ -282,26 +316,76 @@ fn py_class_flags(has_gc_methods: bool, is_gc: bool, is_basetype: bool) -> c_uin flags.try_into().unwrap() } -fn py_class_method_defs( +struct PyClassInfo { + method_defs: Vec, + property_defs: Vec, +} + +fn method_defs_to_pyclass_info( for_each_method_def: &dyn Fn(&mut dyn FnMut(&[PyMethodDefType])), -) -> Vec { - let mut defs = Vec::new(); - - for_each_method_def(&mut |method_defs| { - defs.extend(method_defs.iter().filter_map(|def| match def { - PyMethodDefType::Method(def) - | PyMethodDefType::Class(def) - | PyMethodDefType::Static(def) => Some(def.as_method_def().unwrap()), - _ => None, - })); + has_dict: bool, +) -> PyClassInfo { + let mut method_defs = Vec::new(); + let mut property_defs_map = std::collections::HashMap::new(); + + for_each_method_def(&mut |class_method_defs| { + for def in class_method_defs { + match def { + PyMethodDefType::Getter(getter) => { + getter.copy_to( + property_defs_map + .entry(getter.name) + .or_insert(PY_GET_SET_DEF_INIT), + ); + } + PyMethodDefType::Setter(setter) => { + setter.copy_to( + property_defs_map + .entry(setter.name) + .or_insert(PY_GET_SET_DEF_INIT), + ); + } + PyMethodDefType::Method(def) + | PyMethodDefType::Class(def) + | PyMethodDefType::Static(def) => method_defs.push(def.as_method_def().unwrap()), + PyMethodDefType::ClassAttribute(_) => {} + } + } }); - if !defs.is_empty() { + // TODO: use into_values when on MSRV Rust >= 1.54 + let mut property_defs: Vec<_> = property_defs_map + .into_iter() + .map(|(_, value)| value) + .collect(); + + if !method_defs.is_empty() { + // Safety: Python expects a zeroed entry to mark the end of the defs + method_defs.push(unsafe { std::mem::zeroed() }); + } + + // PyPy doesn't automatically add __dict__ getter / setter. + // PyObject_GenericGetDict not in the limited API until Python 3.10. + if !has_dict { + #[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))] + property_defs.push(ffi::PyGetSetDef { + name: "__dict__\0".as_ptr() as *mut c_char, + get: Some(ffi::PyObject_GenericGetDict), + set: Some(ffi::PyObject_GenericSetDict), + doc: ptr::null_mut(), + closure: ptr::null_mut(), + }); + } + + if !property_defs.is_empty() { // Safety: Python expects a zeroed entry to mark the end of the defs - defs.push(unsafe { std::mem::zeroed() }); + property_defs.push(unsafe { std::mem::zeroed() }); } - defs + PyClassInfo { + method_defs, + property_defs, + } } /// Generates the __dictoffset__ and __weaklistoffset__ members, to set tp_dictoffset and @@ -351,52 +435,3 @@ const PY_GET_SET_DEF_INIT: ffi::PyGetSetDef = ffi::PyGetSetDef { doc: ptr::null_mut(), closure: ptr::null_mut(), }; - -fn py_class_properties( - is_dummy: bool, - for_each_method_def: &dyn Fn(&mut dyn FnMut(&[PyMethodDefType])), -) -> Vec { - let mut defs = std::collections::HashMap::new(); - - for_each_method_def(&mut |method_defs| { - for def in method_defs { - match def { - PyMethodDefType::Getter(getter) => { - getter.copy_to(defs.entry(getter.name).or_insert(PY_GET_SET_DEF_INIT)); - } - PyMethodDefType::Setter(setter) => { - setter.copy_to(defs.entry(setter.name).or_insert(PY_GET_SET_DEF_INIT)); - } - _ => (), - } - } - }); - - let mut props: Vec<_> = defs.values().cloned().collect(); - - // PyPy doesn't automatically adds __dict__ getter / setter. - // PyObject_GenericGetDict not in the limited API until Python 3.10. - push_dict_getset(&mut props, is_dummy); - - if !props.is_empty() { - // Safety: Python expects a zeroed entry to mark the end of the defs - props.push(unsafe { std::mem::zeroed() }); - } - props -} - -#[cfg(not(any(PyPy, all(Py_LIMITED_API, not(Py_3_10)))))] -fn push_dict_getset(props: &mut Vec, is_dummy: bool) { - if !is_dummy { - props.push(ffi::PyGetSetDef { - name: "__dict__\0".as_ptr() as *mut c_char, - get: Some(ffi::PyObject_GenericGetDict), - set: Some(ffi::PyObject_GenericSetDict), - doc: ptr::null_mut(), - closure: ptr::null_mut(), - }); - } -} - -#[cfg(any(PyPy, all(Py_LIMITED_API, not(Py_3_10))))] -fn push_dict_getset(_: &mut Vec, _is_dummy: bool) {} diff --git a/tests/test_proto_methods.rs b/tests/test_proto_methods.rs index 6dd1a752a07..a6d621c9dfc 100644 --- a/tests/test_proto_methods.rs +++ b/tests/test_proto_methods.rs @@ -1,9 +1,9 @@ #![cfg(feature = "macros")] -use pyo3::exceptions::PyValueError; -use pyo3::types::{PyList, PySlice, PyType}; +use pyo3::exceptions::{PyIndexError, PyValueError}; +use pyo3::types::{PyDict, PyList, PyMapping, PySequence, PySlice, PyType}; use pyo3::{exceptions::PyAttributeError, prelude::*}; -use pyo3::{ffi, py_run, AsPyPointer, PyCell}; +use pyo3::{py_run, PyCell}; use std::{isize, iter}; mod common; @@ -166,37 +166,198 @@ fn test_bool() { } #[pyclass] -pub struct Len { - l: usize, -} +pub struct LenOverflow; #[pymethods] -impl Len { +impl LenOverflow { fn __len__(&self) -> usize { - self.l + (isize::MAX as usize) + 1 } } #[test] -fn len() { - let gil = Python::acquire_gil(); - let py = gil.python(); +fn len_overflow() { + Python::with_gil(|py| { + let inst = Py::new(py, LenOverflow).unwrap(); + py_expect_exception!(py, inst, "len(inst)", PyOverflowError); + }); +} - let inst = Py::new(py, Len { l: 10 }).unwrap(); - py_assert!(py, inst, "len(inst) == 10"); - unsafe { - assert_eq!(ffi::PyObject_Size(inst.as_ptr()), 10); - assert_eq!(ffi::PyMapping_Size(inst.as_ptr()), 10); +#[pyclass] +pub struct Mapping { + values: Py, +} + +#[pymethods] +impl Mapping { + fn __len__(&self, py: Python) -> usize { + self.values.as_ref(py).len() } - let inst = Py::new( - py, - Len { - l: (isize::MAX as usize) + 1, - }, - ) - .unwrap(); - py_expect_exception!(py, inst, "len(inst)", PyOverflowError); + fn __getitem__<'a>(&'a self, key: &'a PyAny) -> PyResult<&'a PyAny> { + let any: &PyAny = self.values.as_ref(key.py()).as_ref(); + any.get_item(key) + } + + fn __setitem__(&self, key: &PyAny, value: &PyAny) -> PyResult<()> { + self.values.as_ref(key.py()).set_item(key, value) + } + + fn __delitem__(&self, key: &PyAny) -> PyResult<()> { + self.values.as_ref(key.py()).del_item(key) + } +} + +#[test] +fn mapping() { + Python::with_gil(|py| { + let inst = Py::new( + py, + Mapping { + values: PyDict::new(py).into(), + }, + ) + .unwrap(); + + // + let mapping: &PyMapping = inst.as_ref(py).downcast().unwrap(); + + py_assert!(py, inst, "len(inst) == 0"); + + py_run!(py, inst, "inst['foo'] = 'foo'"); + py_assert!(py, inst, "inst['foo'] == 'foo'"); + py_run!(py, inst, "del inst['foo']"); + py_expect_exception!(py, inst, "inst['foo']", PyKeyError); + + // Default iteration will call __getitem__ with integer indices + // which fails with a KeyError + py_expect_exception!(py, inst, "[*inst] == []", PyKeyError, "0"); + + // check mapping protocol + assert_eq!(mapping.len().unwrap(), 0); + + mapping.set_item(0, 5).unwrap(); + assert_eq!(mapping.len().unwrap(), 1); + + assert_eq!(mapping.get_item(0).unwrap().extract::().unwrap(), 5); + + mapping.del_item(0).unwrap(); + assert_eq!(mapping.len().unwrap(), 0); + }); +} + +#[derive(FromPyObject)] +enum SequenceIndex<'a> { + Integer(isize), + Slice(&'a PySlice), +} + +#[pyclass] +pub struct Sequence { + values: Vec, +} + +#[pymethods] +impl Sequence { + fn __len__(&self) -> usize { + self.values.len() + } + + fn __getitem__(&self, index: SequenceIndex) -> PyResult { + match index { + SequenceIndex::Integer(index) => { + let uindex = self.usize_index(index)?; + self.values + .get(uindex) + .map(Clone::clone) + .ok_or_else(|| PyIndexError::new_err(index)) + } + // Just to prove that slicing can be implemented + SequenceIndex::Slice(s) => Ok(s.into()), + } + } + + fn __setitem__(&mut self, index: isize, value: PyObject) -> PyResult<()> { + let uindex = self.usize_index(index)?; + self.values + .get_mut(uindex) + .map(|place| *place = value) + .ok_or_else(|| PyIndexError::new_err(index)) + } + + fn __delitem__(&mut self, index: isize) -> PyResult<()> { + let uindex = self.usize_index(index)?; + if uindex >= self.values.len() { + Err(PyIndexError::new_err(index)) + } else { + self.values.remove(uindex); + Ok(()) + } + } + + fn append(&mut self, value: PyObject) { + self.values.push(value); + } +} + +impl Sequence { + fn usize_index(&self, index: isize) -> PyResult { + if index < 0 { + let corrected_index = index + self.values.len() as isize; + if corrected_index < 0 { + Err(PyIndexError::new_err(index)) + } else { + Ok(corrected_index as usize) + } + } else { + Ok(index as usize) + } + } +} + +#[test] +fn sequence() { + Python::with_gil(|py| { + let inst = Py::new(py, Sequence { values: vec![] }).unwrap(); + + let sequence: &PySequence = inst.as_ref(py).downcast().unwrap(); + + py_assert!(py, inst, "len(inst) == 0"); + + py_expect_exception!(py, inst, "inst[0]", PyIndexError); + py_run!(py, inst, "inst.append('foo')"); + + py_assert!(py, inst, "inst[0] == 'foo'"); + py_assert!(py, inst, "inst[-1] == 'foo'"); + + py_expect_exception!(py, inst, "inst[1]", PyIndexError); + py_expect_exception!(py, inst, "inst[-2]", PyIndexError); + + py_assert!(py, inst, "[*inst] == ['foo']"); + + py_run!(py, inst, "del inst[0]"); + + py_expect_exception!(py, inst, "inst['foo']", PyTypeError); + + py_assert!(py, inst, "inst[0:2] == slice(0, 2)"); + + // check sequence protocol + + // we don't implement sequence length so that CPython doesn't attempt to correct negative + // indices. + assert!(sequence.len().is_err()); + // however regular python len() works thanks to mp_len slot + assert_eq!(inst.as_ref(py).len().unwrap(), 0); + + py_run!(py, inst, "inst.append(0)"); + sequence.set_item(0, 5).unwrap(); + assert_eq!(inst.as_ref(py).len().unwrap(), 1); + + assert_eq!(sequence.get_item(0).unwrap().extract::().unwrap(), 5); + sequence.del_item(0).unwrap(); + + assert_eq!(inst.as_ref(py).len().unwrap(), 0); + }); } #[pyclass]