From c16cc35b30bce8092dfb5e074a76e2642815cea2 Mon Sep 17 00:00:00 2001 From: David Hewitt <1939362+davidhewitt@users.noreply.github.com> Date: Mon, 21 Mar 2022 23:34:50 +0000 Subject: [PATCH 1/2] pyclass: mapping flag --- CHANGELOG.md | 1 + guide/src/class.md | 1 + guide/src/class/protocols.md | 11 ++++++- pyo3-ffi/src/object.rs | 6 ++++ pyo3-macros-backend/src/attributes.rs | 1 + pyo3-macros-backend/src/pyclass.rs | 7 +++++ pyo3-macros/docs/pyclass_parameters.md | 1 + pyo3-macros/src/lib.rs | 1 + src/impl_/pyclass.rs | 3 ++ src/pyclass.rs | 40 +++++++++++++++----------- tests/test_mapping.rs | 23 ++++++++++++++- tests/ui/invalid_pyclass_args.stderr | 4 +-- 12 files changed, 78 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fda04d1dd12..dbaf56f3416 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added `as_bytes` on `Py`. [#2235](https://github.com/PyO3/pyo3/pull/2235) +- Add `#[pyclass(mapping)]` option to leave sequence slots empty in container implementations. [#2265](https://github.com/PyO3/pyo3/pull/2265) ### Packaging diff --git a/guide/src/class.md b/guide/src/class.md index 67e108220f9..73350b9d74e 100644 --- a/guide/src/class.md +++ b/guide/src/class.md @@ -203,6 +203,7 @@ Python::with_gil(|py|{ [params-4]: https://doc.rust-lang.org/stable/std/rc/struct.Rc.html [params-5]: https://doc.rust-lang.org/stable/std/sync/struct.Rc.html [params-6]: https://docs.python.org/3/library/weakref.html +[params-mapping]: ./class/protocols.md#mapping--sequence-types These parameters are covered in various sections of this guide. diff --git a/guide/src/class/protocols.md b/guide/src/class/protocols.md index 320307d00d5..3bd2b86a26b 100644 --- a/guide/src/class/protocols.md +++ b/guide/src/class/protocols.md @@ -192,6 +192,16 @@ and `Return` a final value - see its docs for further details and an example. ### Mapping & Sequence types +The magic methods in this section can be used to implement Python container types. They are two main categories of container in Python: "mappings" such as `dict`, with arbitrary keys, and "sequences" such as `list` and `tuple`, with integer keys. + +The Python C-API which PyO3 is built upon has separate "slots" for sequences and mappings. When writing a `class` in pure Python, there is no such distinction in the implementation - a `__getitem__` implementation will fill the slots for both the mapping and sequence forms, for example. + +By default PyO3 reproduces the Python behaviour of filling both mapping and sequence slots. This makes sense for the "simple" case which matches Python, and also for sequences, where the mapping slot is used anyway to implement slice indexing. + +For mapping types, it may be desirable to not have the sequence slots filled. Use the `#[pyclass(mapping)]` annotation to instruct PyO3 to only fill the mapping slots, leaving the sequence ones empty. + +This behaviour affects the implementation of `__getitem__`, `__setitem__`, and `__delitem__`. + - `__len__() -> usize` Implements the built-in function `len()` for the sequence. @@ -265,7 +275,6 @@ and `Return` a final value - see its docs for further details and an example. Used by the `*=` operator, after trying the numeric multiplication via the `__imul__` method. - ### Descriptors - `__get__(, object, object) -> object` diff --git a/pyo3-ffi/src/object.rs b/pyo3-ffi/src/object.rs index 408ce861b61..a0c8174b33c 100644 --- a/pyo3-ffi/src/object.rs +++ b/pyo3-ffi/src/object.rs @@ -337,6 +337,12 @@ extern "C" { // Flag bits for printing: pub const Py_PRINT_RAW: c_int = 1; // No string quotes etc. +#[cfg(all(Py_3_10, not(Py_LIMITED_API)))] +pub const Py_TPFLAGS_SEQUENCE: c_ulong = 1 << 5; + +#[cfg(all(Py_3_10, not(Py_LIMITED_API)))] +pub const Py_TPFLAGS_MAPPING: c_ulong = 1 << 6; + #[cfg(Py_3_10)] pub const Py_TPFLAGS_DISALLOW_INSTANTIATION: c_ulong = 1 << 7; diff --git a/pyo3-macros-backend/src/attributes.rs b/pyo3-macros-backend/src/attributes.rs index 2b63f0cfb11..6d381e21933 100644 --- a/pyo3-macros-backend/src/attributes.rs +++ b/pyo3-macros-backend/src/attributes.rs @@ -18,6 +18,7 @@ pub mod kw { syn::custom_keyword!(gc); syn::custom_keyword!(get); syn::custom_keyword!(item); + syn::custom_keyword!(mapping); syn::custom_keyword!(module); syn::custom_keyword!(name); syn::custom_keyword!(pass_module); diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 4a016fe82a8..3a7d943f401 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -54,6 +54,7 @@ pub struct PyClassPyO3Options { pub dict: Option, pub extends: Option, pub freelist: Option, + pub mapping: Option, pub module: Option, pub name: Option, pub subclass: Option, @@ -69,6 +70,7 @@ enum PyClassPyO3Option { Dict(kw::dict), Extends(ExtendsAttribute), Freelist(FreelistAttribute), + Mapping(kw::mapping), Module(ModuleAttribute), Name(NameAttribute), Subclass(kw::subclass), @@ -90,6 +92,8 @@ impl Parse for PyClassPyO3Option { input.parse().map(PyClassPyO3Option::Extends) } else if lookahead.peek(attributes::kw::freelist) { input.parse().map(PyClassPyO3Option::Freelist) + } else if lookahead.peek(attributes::kw::mapping) { + input.parse().map(PyClassPyO3Option::Mapping) } else if lookahead.peek(attributes::kw::module) { input.parse().map(PyClassPyO3Option::Module) } else if lookahead.peek(kw::name) { @@ -145,6 +149,7 @@ impl PyClassPyO3Options { PyClassPyO3Option::Dict(dict) => set_option!(dict), PyClassPyO3Option::Extends(extends) => set_option!(extends), PyClassPyO3Option::Freelist(freelist) => set_option!(freelist), + PyClassPyO3Option::Mapping(mapping) => set_option!(mapping), PyClassPyO3Option::Module(module) => set_option!(module), PyClassPyO3Option::Name(name) => set_option!(name), PyClassPyO3Option::Subclass(subclass) => set_option!(subclass), @@ -749,6 +754,7 @@ impl<'a> PyClassImplsBuilder<'a> { .map(|extends_attr| extends_attr.value.clone()) .unwrap_or_else(|| parse_quote! { _pyo3::PyAny }); let is_subclass = self.attr.options.extends.is_some(); + let is_mapping: bool = self.attr.options.mapping.is_some(); let dict_offset = if self.attr.options.dict.is_some() { quote! { @@ -830,6 +836,7 @@ impl<'a> PyClassImplsBuilder<'a> { const DOC: &'static str = #doc; const IS_BASETYPE: bool = #is_basetype; const IS_SUBCLASS: bool = #is_subclass; + const IS_MAPPING: bool = #is_mapping; type Layout = _pyo3::PyCell; type BaseType = #base; diff --git a/pyo3-macros/docs/pyclass_parameters.md b/pyo3-macros/docs/pyclass_parameters.md index ae5ef9ddc23..145d5ac8db7 100644 --- a/pyo3-macros/docs/pyclass_parameters.md +++ b/pyo3-macros/docs/pyclass_parameters.md @@ -6,6 +6,7 @@ | `dict` | Gives instances of this class an empty `__dict__` to store custom attributes. | | `extends = BaseType` | Use a custom baseclass. Defaults to [`PyAny`][params-1] | | `freelist = N` | Implements a [free list][params-2] of size N. This can improve performance for types that are often created and deleted in quick succession. Profile your code to see whether `freelist` is right for you. | +| `mapping` | Inform PyO3 that this class is a [`Mapping`][params-mapping], and so leave its implementation of sequence C-API slots empty. | | `module = "module_name"` | Python code will see the class as being defined in this module. Defaults to `builtins`. | | `name = "python_name"` | Sets the name that Python sees this class as. Defaults to the name of the Rust struct. | | `text_signature = "(arg1, arg2, ...)"` | Sets the text signature for the Python class' `__new__` method. | diff --git a/pyo3-macros/src/lib.rs b/pyo3-macros/src/lib.rs index da9784d1d1c..3577b9c23e9 100644 --- a/pyo3-macros/src/lib.rs +++ b/pyo3-macros/src/lib.rs @@ -93,6 +93,7 @@ pub fn pyproto(_: TokenStream, input: TokenStream) -> TokenStream { /// [params-4]: std::rc::Rc /// [params-5]: std::sync::Arc /// [params-6]: https://docs.python.org/3/library/weakref.html +/// [params-mapping]: https://pyo3.rs/latest/class/protocols.html#mapping--sequence-types #[proc_macro_attribute] pub fn pyclass(attr: TokenStream, input: TokenStream) -> TokenStream { use syn::Item; diff --git a/src/impl_/pyclass.rs b/src/impl_/pyclass.rs index d06db820a79..48ce1d14800 100644 --- a/src/impl_/pyclass.rs +++ b/src/impl_/pyclass.rs @@ -155,6 +155,9 @@ pub trait PyClassImpl: Sized { /// #[pyclass(extends=...)] const IS_SUBCLASS: bool = false; + /// #[pyclass(mapping)] + const IS_MAPPING: bool = false; + /// Layout type Layout: PyLayout; diff --git a/src/pyclass.rs b/src/pyclass.rs index 083223cdb62..eb2bb5c9595 100644 --- a/src/pyclass.rs +++ b/src/pyclass.rs @@ -55,6 +55,7 @@ where T::weaklist_offset(), &T::for_all_items, T::IS_BASETYPE, + T::IS_MAPPING, ) } { Ok(type_object) => type_object, @@ -75,6 +76,7 @@ unsafe fn create_type_object_impl( weaklist_offset: Option, for_all_items: &dyn Fn(&mut dyn FnMut(&PyClassItems)), is_basetype: bool, + is_mapping: bool, ) -> PyResult<*mut ffi::PyTypeObject> { let mut slots = Vec::new(); @@ -147,26 +149,30 @@ unsafe fn create_type_object_impl( slots.extend_from_slice(items.slots); }); - // If mapping methods implemented, define sequence methods get implemented too. - // CPython does the same for Python `class` statements. + if !is_mapping { + // If mapping methods implemented, define sequence methods get implemented too. + // CPython does the same for Python `class` statements. - // NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding - // the length to negative indices. + // NB we don't implement sq_length to avoid annoying CPython behaviour of automatically adding + // the length to negative indices. - if has_getitem { - push_slot( - &mut slots, - ffi::Py_sq_item, - get_sequence_item_from_mapping as _, - ); - } + // Don't add these methods for "pure" mappings. - if has_setitem { - push_slot( - &mut slots, - ffi::Py_sq_ass_item, - assign_sequence_item_from_mapping as _, - ); + if has_getitem { + push_slot( + &mut slots, + ffi::Py_sq_item, + get_sequence_item_from_mapping as _, + ); + } + + if has_setitem { + push_slot( + &mut slots, + ffi::Py_sq_ass_item, + assign_sequence_item_from_mapping as _, + ); + } } if !has_new { diff --git a/tests/test_mapping.rs b/tests/test_mapping.rs index 2f916d8ee89..d9efb77288c 100644 --- a/tests/test_mapping.rs +++ b/tests/test_mapping.rs @@ -7,10 +7,12 @@ use pyo3::prelude::*; use pyo3::py_run; use pyo3::types::IntoPyDict; use pyo3::types::PyList; +use pyo3::types::PyMapping; +use pyo3::types::PySequence; mod common; -#[pyclass] +#[pyclass(mapping)] struct Mapping { index: HashMap, } @@ -55,6 +57,13 @@ impl Mapping { Ok(()) } } + + fn get(&self, py: Python<'_>, key: &str, default: Option) -> Option { + self.index + .get(key) + .map(|value| value.into_py(py)) + .or(default) + } } /// Return a dict with `m = Mapping(['1', '2', '3'])`. @@ -103,3 +112,15 @@ fn test_delitem() { py_expect_exception!(py, *d, "del m[-1]", PyTypeError); py_expect_exception!(py, *d, "del m['4']", PyKeyError); } + +#[test] +fn mapping_is_not_sequence() { + Python::with_gil(|py| { + let mut index = HashMap::new(); + index.insert("Foo".into(), 1); + index.insert("Bar".into(), 2); + let m = Py::new(py, Mapping { index }).unwrap(); + assert!(m.as_ref(py).downcast::().is_ok()); + assert!(m.as_ref(py).downcast::().is_err()); + }); +} diff --git a/tests/ui/invalid_pyclass_args.stderr b/tests/ui/invalid_pyclass_args.stderr index fd64e06e755..2fc642d8ba0 100644 --- a/tests/ui/invalid_pyclass_args.stderr +++ b/tests/ui/invalid_pyclass_args.stderr @@ -1,4 +1,4 @@ -error: expected one of: `crate`, `dict`, `extends`, `freelist`, `module`, `name`, `subclass`, `text_signature`, `unsendable`, `weakref`, `gc` +error: expected one of: `crate`, `dict`, `extends`, `freelist`, `mapping`, `module`, `name`, `subclass`, `text_signature`, `unsendable`, `weakref`, `gc` --> tests/ui/invalid_pyclass_args.rs:3:11 | 3 | #[pyclass(extend=pyo3::types::PyDict)] @@ -34,7 +34,7 @@ error: expected string literal 18 | #[pyclass(module = my_module)] | ^^^^^^^^^ -error: expected one of: `crate`, `dict`, `extends`, `freelist`, `module`, `name`, `subclass`, `text_signature`, `unsendable`, `weakref`, `gc` +error: expected one of: `crate`, `dict`, `extends`, `freelist`, `mapping`, `module`, `name`, `subclass`, `text_signature`, `unsendable`, `weakref`, `gc` --> tests/ui/invalid_pyclass_args.rs:21:11 | 21 | #[pyclass(weakrev)] From 9f3ccff8c9d1c5a70c16f17c973a3c54d6d1f1fc Mon Sep 17 00:00:00 2001 From: David Hewitt <1939362+davidhewitt@users.noreply.github.com> Date: Mon, 4 Apr 2022 15:51:01 +0100 Subject: [PATCH 2/2] guide: improve documentation for `#[pyclass(mapping)]` --- guide/src/class/protocols.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/guide/src/class/protocols.md b/guide/src/class/protocols.md index 3bd2b86a26b..88e76d64372 100644 --- a/guide/src/class/protocols.md +++ b/guide/src/class/protocols.md @@ -198,9 +198,11 @@ The Python C-API which PyO3 is built upon has separate "slots" for sequences and By default PyO3 reproduces the Python behaviour of filling both mapping and sequence slots. This makes sense for the "simple" case which matches Python, and also for sequences, where the mapping slot is used anyway to implement slice indexing. -For mapping types, it may be desirable to not have the sequence slots filled. Use the `#[pyclass(mapping)]` annotation to instruct PyO3 to only fill the mapping slots, leaving the sequence ones empty. +Mapping types usually will not want the sequence slots filled. Having them filled will lead to outcomes which may be unwanted, such as: +- The mapping type will successfully cast to [`PySequence`]. This may lead to consumers of the type handling it incorrectly. +- Python provides a default implementation of `__iter__` for sequences, which calls `__getitem__` with consecutive positive integers starting from 0 until an `IndexError` is returned. Unless the mapping only contains consecutive positive integer keys, this `__iter__` implementation will likely not be the intended behavior. -This behaviour affects the implementation of `__getitem__`, `__setitem__`, and `__delitem__`. +Use the `#[pyclass(mapping)]` annotation to instruct PyO3 to only fill the mapping slots, leaving the sequence ones empty. This will apply to `__getitem__`, `__setitem__`, and `__delitem__`. - `__len__() -> usize` @@ -608,3 +610,4 @@ For details, look at the `#[pymethods]` regarding GC methods. [`PyObjectProtocol`]: {{#PYO3_DOCS_URL}}/pyo3/class/basic/trait.PyObjectProtocol.html [`PySequenceProtocol`]: {{#PYO3_DOCS_URL}}/pyo3/class/sequence/trait.PySequenceProtocol.html [`PyIterProtocol`]: {{#PYO3_DOCS_URL}}/pyo3/class/iter/trait.PyIterProtocol.html +[`PySequence`]: {{#PYO3_DOCS_URL}}/pyo3/types/struct.PySequence.html