From 3965d1a026d956659a309e569c254c081ebcb285 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sun, 3 Apr 2022 09:30:26 +0200 Subject: [PATCH] Add PyString::intern to enable access to Python's built-in string interning. --- CHANGELOG.md | 1 + src/types/string.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db19eba9431..6a95190576a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Allow dependent crates to access config values from `pyo3-build-config` via cargo link dep env vars. [#2092](https://github.com/PyO3/pyo3/pull/2092) - Added methods on `InterpreterConfig` to run Python scripts using the configured executable. [#2092](https://github.com/PyO3/pyo3/pull/2092) - Added FFI definitions for `PyType_FromModuleAndSpec`, `PyType_GetModule`, `PyType_GetModuleState` and `PyModule_AddType`. [#2250](https://github.com/PyO3/pyo3/pull/2250) +- Add `PyString::intern` to enable usage of the Python's built-in string interning. [#2268](https://github.com/PyO3/pyo3/pull/2268) ### Changed diff --git a/src/types/string.rs b/src/types/string.rs index 76610a108ae..4e906f8ce51 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -144,6 +144,26 @@ impl PyString { unsafe { py.from_owned_ptr(ffi::PyUnicode_FromStringAndSize(ptr, len)) } } + /// Intern the given string + /// + /// This will return a reference to the same Python string object if called repeatedly with the same string. + /// + /// Note that while this is more memory efficient than [`PyString::new`], it unconditionally allocates a + /// temporary Python string object and is thereby slower than [`PyString::new`]. + /// + /// Panics if out of memory. + pub fn intern<'p>(py: Python<'p>, s: &str) -> &'p PyString { + let ptr = s.as_ptr() as *const c_char; + let len = s.len() as ffi::Py_ssize_t; + unsafe { + let mut ob = ffi::PyUnicode_FromStringAndSize(ptr, len); + if !ob.is_null() { + ffi::PyUnicode_InternInPlace(&mut ob); + } + py.from_owned_ptr(ob) + } + } + /// Attempts to create a Python string from a Python [bytes-like object]. /// /// [bytes-like object]: (https://docs.python.org/3/glossary.html#term-bytes-like-object). @@ -592,4 +612,22 @@ mod tests { assert_eq!(data.to_string_lossy(), Cow::Owned::("𠀀�".into())); }); } + + #[test] + fn test_intern_string() { + Python::with_gil(|py| { + let py_string1 = PyString::intern(py, "foo"); + assert_eq!(py_string1.to_str().unwrap(), "foo"); + + let py_string2 = PyString::intern(py, "foo"); + assert_eq!(py_string2.to_str().unwrap(), "foo"); + + assert_eq!(py_string1.as_ptr(), py_string2.as_ptr()); + + let py_string3 = PyString::intern(py, "bar"); + assert_eq!(py_string3.to_str().unwrap(), "bar"); + + assert_ne!(py_string1.as_ptr(), py_string3.as_ptr()); + }); + } }