diff --git a/CHANGELOG.md b/CHANGELOG.md index f941c75a0ea..475c023cd0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,9 @@ PyO3 versions, please see the [migration guide](https://pyo3.rs/master/migration The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## Unreleased +## [Unreleased] ### Added +- Add conversions between `OsStr`/`OsString`/`Path`/`PathBuf` and Python strings. [#1379](https://github.com/PyO3/pyo3/pull/1379) - Add FFI definition `PyCFunction_CheckExact` for Python 3.9 and later. [#1425](https://github.com/PyO3/pyo3/pull/1425) ### Changed diff --git a/guide/src/conversions/tables.md b/guide/src/conversions/tables.md index 72768a6f41e..a7fada477a9 100644 --- a/guide/src/conversions/tables.md +++ b/guide/src/conversions/tables.md @@ -13,7 +13,7 @@ The table below contains the Python type and the corresponding function argument | Python | Rust | Rust (Python-native) | | ------------- |:-------------------------------:|:--------------------:| | `object` | - | `&PyAny` | -| `str` | `String`, `Cow`, `&str` | `&PyUnicode` | +| `str` | `String`, `Cow`, `&str`, `OsString`, `PathBuf` | `&PyUnicode` | | `bytes` | `Vec`, `&[u8]` | `&PyBytes` | | `bool` | `bool` | `&PyBool` | | `int` | Any integer type (`i32`, `u32`, `usize`, etc) | `&PyLong` | diff --git a/src/conversions/mod.rs b/src/conversions/mod.rs new file mode 100644 index 00000000000..a7ccff995ac --- /dev/null +++ b/src/conversions/mod.rs @@ -0,0 +1,5 @@ +//! This module contains conversions between non-String Rust object and their string representation +//! in Python + +mod osstr; +mod path; diff --git a/src/conversions/osstr.rs b/src/conversions/osstr.rs new file mode 100644 index 00000000000..be6f405bfea --- /dev/null +++ b/src/conversions/osstr.rs @@ -0,0 +1,202 @@ +use crate::types::PyString; +#[cfg(windows)] +use crate::PyErr; +use crate::PyNativeType; +use crate::{ + ffi, AsPyPointer, FromPyObject, IntoPy, PyAny, PyObject, PyResult, PyTryFrom, Python, + ToPyObject, +}; +use std::borrow::Cow; +use std::ffi::{OsStr, OsString}; +#[cfg(not(windows))] +use std::os::raw::c_char; + +impl ToPyObject for OsStr { + fn to_object(&self, py: Python) -> PyObject { + // If the string is UTF-8, take the quick and easy shortcut + if let Some(valid_utf8_path) = self.to_str() { + return valid_utf8_path.to_object(py); + } + + // All targets besides windows support the std::os::unix::ffi::OsStrExt API: + // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59 + #[cfg(not(windows))] + { + let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self); + + let ptr = bytes.as_ptr() as *const c_char; + let len = bytes.len() as ffi::Py_ssize_t; + unsafe { + // DecodeFSDefault automatically chooses an appropriate decoding mechanism to + // parse os strings losslessly (i.e. surrogateescape most of the time) + let pystring = ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len); + PyObject::from_owned_ptr(py, pystring) + } + } + + #[cfg(windows)] + { + let wstr: Vec = std::os::windows::ffi::OsStrExt::encode_wide(self).collect(); + + unsafe { + // This will not panic because the data from encode_wide is well-formed Windows + // string data + PyObject::from_owned_ptr( + py, + ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t), + ) + } + } + } +} + +// There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would +// be impossible to implement on Windows. Hence it's omitted entirely + +impl FromPyObject<'_> for OsString { + fn extract(ob: &PyAny) -> PyResult { + let pystring = ::try_from(ob)?; // Cast PyAny to PyString + + #[cfg(not(windows))] + { + // Decode from Python's lossless bytes string representation back into raw bytes + let fs_encoded_bytes = unsafe { + crate::Py::::from_owned_ptr( + ob.py(), + ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()), + ) + }; + + // Create an OsStr view into the raw bytes from Python + let os_str: &OsStr = std::os::unix::ffi::OsStrExt::from_bytes( + fs_encoded_bytes.as_ref(ob.py()).as_bytes(), + ); + + Ok(os_str.to_os_string()) + } + + #[cfg(windows)] + { + // Take the quick and easy shortcut if UTF-8 + if let Ok(utf8_string) = pystring.to_str() { + return Ok(utf8_string.to_owned().into()); + } + + // Get an owned allocated wide char buffer from PyString, which we have to deallocate + // ourselves + let size = + unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) }; + if size == -1 { + return Err(PyErr::fetch(ob.py())); + } + + let mut buffer = vec![0; size as usize]; + let bytes_read = + unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) }; + assert_eq!(bytes_read, size); + + // Copy wide char buffer into OsString + let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer); + + Ok(os_string) + } + } +} + +impl IntoPy for &'_ OsStr { + #[inline] + fn into_py(self, py: Python) -> PyObject { + self.to_object(py) + } +} + +impl ToPyObject for Cow<'_, OsStr> { + #[inline] + fn to_object(&self, py: Python) -> PyObject { + (&self as &OsStr).to_object(py) + } +} + +impl IntoPy for Cow<'_, OsStr> { + #[inline] + fn into_py(self, py: Python) -> PyObject { + self.to_object(py) + } +} + +impl ToPyObject for OsString { + #[inline] + fn to_object(&self, py: Python) -> PyObject { + (&self as &OsStr).to_object(py) + } +} + +impl IntoPy for OsString { + fn into_py(self, py: Python) -> PyObject { + self.to_object(py) + } +} + +#[cfg(test)] +mod test { + use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject}; + use std::fmt::Debug; + use std::{ + borrow::Cow, + ffi::{OsStr, OsString}, + }; + + #[test] + #[cfg(not(windows))] + fn test_non_utf8_conversion() { + Python::with_gil(|py| { + use std::os::unix::ffi::OsStrExt; + + // this is not valid UTF-8 + let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; + let os_str = OsStr::from_bytes(payload); + + // do a roundtrip into Pythonland and back and compare + let py_str: PyObject = os_str.into_py(py); + let os_str_2: OsString = py_str.extract(py).unwrap(); + assert_eq!(os_str, os_str_2); + }); + } + + #[test] + fn test_topyobject_roundtrip() { + Python::with_gil(|py| { + fn test_roundtrip + Debug>(py: Python, obj: T) { + let pyobject = obj.to_object(py); + let pystring: &PyString = pyobject.extract(py).unwrap(); + assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); + let roundtripped_obj: OsString = pystring.extract().unwrap(); + assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str()); + } + let os_str = OsStr::new("Hello\0\nšŸ"); + test_roundtrip::<&OsStr>(py, os_str); + test_roundtrip::>(py, Cow::Borrowed(os_str)); + test_roundtrip::>(py, Cow::Owned(os_str.to_os_string())); + test_roundtrip::(py, os_str.to_os_string()); + }); + } + + #[test] + fn test_intopy_roundtrip() { + Python::with_gil(|py| { + fn test_roundtrip + AsRef + Debug + Clone>( + py: Python, + obj: T, + ) { + let pyobject = obj.clone().into_py(py); + let pystring: &PyString = pyobject.extract(py).unwrap(); + assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); + let roundtripped_obj: OsString = pystring.extract().unwrap(); + assert!(obj.as_ref() == roundtripped_obj.as_os_str()); + } + let os_str = OsStr::new("Hello\0\nšŸ"); + test_roundtrip::<&OsStr>(py, os_str); + test_roundtrip::(py, os_str.to_os_string()); + }) + } +} diff --git a/src/conversions/path.rs b/src/conversions/path.rs new file mode 100644 index 00000000000..a08345b9b26 --- /dev/null +++ b/src/conversions/path.rs @@ -0,0 +1,115 @@ +use crate::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject}; +use std::borrow::Cow; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +impl ToPyObject for Path { + fn to_object(&self, py: Python) -> PyObject { + self.as_os_str().to_object(py) + } +} + +// See osstr.rs for why there's no FromPyObject impl for &Path + +impl FromPyObject<'_> for PathBuf { + fn extract(ob: &PyAny) -> PyResult { + Ok(PathBuf::from(OsString::extract(ob)?)) + } +} + +impl<'a> IntoPy for &'a Path { + #[inline] + fn into_py(self, py: Python) -> PyObject { + self.as_os_str().to_object(py) + } +} + +impl<'a> ToPyObject for Cow<'a, Path> { + #[inline] + fn to_object(&self, py: Python) -> PyObject { + self.as_os_str().to_object(py) + } +} + +impl<'a> IntoPy for Cow<'a, Path> { + #[inline] + fn into_py(self, py: Python) -> PyObject { + self.to_object(py) + } +} + +impl ToPyObject for PathBuf { + #[inline] + fn to_object(&self, py: Python) -> PyObject { + self.as_os_str().to_object(py) + } +} + +impl IntoPy for PathBuf { + fn into_py(self, py: Python) -> PyObject { + self.into_os_string().to_object(py) + } +} + +#[cfg(test)] +mod test { + use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject}; + use std::borrow::Cow; + use std::fmt::Debug; + use std::path::{Path, PathBuf}; + + #[test] + #[cfg(not(windows))] + fn test_non_utf8_conversion() { + Python::with_gil(|py| { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // this is not valid UTF-8 + let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; + let path = Path::new(OsStr::from_bytes(payload)); + + // do a roundtrip into Pythonland and back and compare + let py_str: PyObject = path.into_py(py); + let path_2: PathBuf = py_str.extract(py).unwrap(); + assert_eq!(path, path_2); + }); + } + + #[test] + fn test_topyobject_roundtrip() { + Python::with_gil(|py| { + fn test_roundtrip + Debug>(py: Python, obj: T) { + let pyobject = obj.to_object(py); + let pystring: &PyString = pyobject.extract(py).unwrap(); + assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); + let roundtripped_obj: PathBuf = pystring.extract().unwrap(); + assert_eq!(obj.as_ref(), roundtripped_obj.as_path()); + } + let path = Path::new("Hello\0\nšŸ"); + test_roundtrip::<&Path>(py, path); + test_roundtrip::>(py, Cow::Borrowed(path)); + test_roundtrip::>(py, Cow::Owned(path.to_path_buf())); + test_roundtrip::(py, path.to_path_buf()); + }); + } + + #[test] + fn test_intopy_roundtrip() { + Python::with_gil(|py| { + fn test_roundtrip + AsRef + Debug + Clone>( + py: Python, + obj: T, + ) { + let pyobject = obj.clone().into_py(py); + let pystring: &PyString = pyobject.extract(py).unwrap(); + assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); + let roundtripped_obj: PathBuf = pystring.extract().unwrap(); + assert!(obj.as_ref() == roundtripped_obj.as_path()); + } + let path = Path::new("Hello\0\nšŸ"); + test_roundtrip::<&Path>(py, path); + test_roundtrip::(py, path.to_path_buf()); + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index ccbd0e671ae..874b0562127 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -180,6 +180,7 @@ pub mod buffer; pub mod callback; pub mod class; pub mod conversion; +mod conversions; #[macro_use] #[doc(hidden)] pub mod derive_utils; diff --git a/src/types/string.rs b/src/types/string.rs index 18b05c99f00..7e75556e713 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -131,6 +131,13 @@ impl<'a> ToPyObject for Cow<'a, str> { } } +impl IntoPy for Cow<'_, str> { + #[inline] + fn into_py(self, py: Python) -> PyObject { + self.to_object(py) + } +} + /// Converts a Rust `String` to a Python object. /// See `PyString::new` for details on the conversion. impl ToPyObject for String {