diff --git a/src/ffi/cpython/mod.rs b/src/ffi/cpython/mod.rs index 19f3ccd12f1..b1701d1794e 100644 --- a/src/ffi/cpython/mod.rs +++ b/src/ffi/cpython/mod.rs @@ -20,6 +20,7 @@ pub(crate) mod pydebug; #[cfg(all(Py_3_8, not(PyPy)))] pub(crate) mod pylifecycle; pub(crate) mod pystate; +pub(crate) mod unicodeobject; pub use self::abstract_::*; #[cfg(not(PyPy))] @@ -40,3 +41,4 @@ pub use self::pydebug::*; #[cfg(all(Py_3_8, not(PyPy)))] pub use self::pylifecycle::*; pub use self::pystate::*; +pub use self::unicodeobject::*; diff --git a/src/ffi/cpython/unicodeobject.rs b/src/ffi/cpython/unicodeobject.rs new file mode 100644 index 00000000000..7c61243513b --- /dev/null +++ b/src/ffi/cpython/unicodeobject.rs @@ -0,0 +1,605 @@ +use crate::ffi::{ + PyObject, PyUnicode_Check, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_hash_t, Py_ssize_t, +}; +use libc::wchar_t; +use std::os::raw::{c_char, c_int, c_uint, c_void}; + +// skipped Py_UNICODE_ISSPACE() +// skipped Py_UNICODE_ISLOWER() +// skipped Py_UNICODE_ISUPPER() +// skipped Py_UNICODE_ISTITLE() +// skipped Py_UNICODE_ISLINEBREAK +// skipped Py_UNICODE_TOLOWER +// skipped Py_UNICODE_TOUPPER +// skipped Py_UNICODE_TOTITLE +// skipped Py_UNICODE_ISDECIMAL +// skipped Py_UNICODE_ISDIGIT +// skipped Py_UNICODE_ISNUMERIC +// skipped Py_UNICODE_ISPRINTABLE +// skipped Py_UNICODE_TODECIMAL +// skipped Py_UNICODE_TODIGIT +// skipped Py_UNICODE_TONUMERIC +// skipped Py_UNICODE_ISALPHA +// skipped Py_UNICODE_ISALNUM +// skipped Py_UNICODE_COPY +// skipped Py_UNICODE_FILL +// skipped Py_UNICODE_IS_SURROGATE +// skipped Py_UNICODE_IS_HIGH_SURROGATE +// skipped Py_UNICODE_IS_LOW_SURROGATE +// skipped Py_UNICODE_JOIN_SURROGATES +// skipped Py_UNICODE_HIGH_SURROGATE +// skipped Py_UNICODE_LOW_SURROGATE + +#[repr(C)] +pub struct PyASCIIObject { + pub ob_base: PyObject, + pub length: Py_ssize_t, + pub hash: Py_hash_t, + /// A bit field with various properties. + /// + /// Rust doesn't expose bitfields. So we have accessor functions for + /// retrieving values. + /// + /// unsigned int interned:2; // SSTATE_* constants. + /// unsigned int kind:3; // PyUnicode_*_KIND constants. + /// unsigned int compact:1; + /// unsigned int ascii:1; + /// unsigned int ready:1; + /// unsigned int :24; + pub state: u32, + pub wstr: *mut wchar_t, +} + +impl PyASCIIObject { + #[inline] + pub fn interned(&self) -> c_uint { + self.state & 3 + } + + #[inline] + pub fn kind(&self) -> c_uint { + (self.state >> 2) & 7 + } + + #[inline] + pub fn compact(&self) -> c_uint { + (self.state >> 5) & 1 + } + + #[inline] + pub fn ascii(&self) -> c_uint { + (self.state >> 6) & 1 + } + + #[inline] + pub fn ready(&self) -> c_uint { + (self.state >> 7) & 1 + } +} + +#[repr(C)] +pub struct PyCompactUnicodeObject { + pub _base: PyASCIIObject, + pub utf8_length: Py_ssize_t, + pub utf8: *mut c_char, + pub wstr_length: Py_ssize_t, +} + +#[repr(C)] +pub union PyUnicodeObjectData { + any: *mut c_void, + latin1: *mut Py_UCS1, + ucs2: *mut Py_UCS2, + ucs4: *mut Py_UCS4, +} + +#[repr(C)] +pub struct PyUnicodeObject { + pub _base: PyCompactUnicodeObject, + pub data: PyUnicodeObjectData, +} + +extern "C" { + #[cfg(not(PyPy))] + pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int; +} + +// skipped PyUnicode_GET_SIZE +// skipped PyUnicode_GET_DATA_SIZE +// skipped PyUnicode_AS_UNICODE +// skipped PyUnicode_AS_DATA + +pub const SSTATE_NOT_INTERNED: c_uint = 0; +pub const SSTATE_INTERNED_MORTAL: c_uint = 1; +pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2; + +#[inline] +pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint { + debug_assert!(PyUnicode_Check(op) != 0); + debug_assert!(PyUnicode_IS_READY(op) != 0); + + (*(op as *mut PyASCIIObject)).ascii() +} + +#[inline] +pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint { + (*(op as *mut PyASCIIObject)).compact() +} + +#[inline] +pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint { + if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 { + 1 + } else { + 0 + } +} + +#[cfg(not(Py_3_12))] +#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))] +pub const PyUnicode_WCHAR_KIND: c_uint = 0; + +pub const PyUnicode_1BYTE_KIND: c_uint = 1; +pub const PyUnicode_2BYTE_KIND: c_uint = 2; +pub const PyUnicode_4BYTE_KIND: c_uint = 4; + +#[inline] +pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 { + PyUnicode_DATA(op) as *mut Py_UCS1 +} + +#[inline] +pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 { + PyUnicode_DATA(op) as *mut Py_UCS2 +} + +#[inline] +pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 { + PyUnicode_DATA(op) as *mut Py_UCS4 +} + +#[inline] +pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint { + debug_assert!(PyUnicode_Check(op) != 0); + debug_assert!(PyUnicode_IS_READY(op) != 0); + + (*(op as *mut PyASCIIObject)).kind() +} + +#[inline] +pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void { + if PyUnicode_IS_ASCII(op) != 0 { + (op as *mut PyASCIIObject).offset(1) as *mut c_void + } else { + (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void + } +} + +#[inline] +pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void { + debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null()); + + (*(op as *mut PyUnicodeObject)).data.any +} + +#[inline] +pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void { + debug_assert!(PyUnicode_Check(op) != 0); + + if PyUnicode_IS_COMPACT(op) != 0 { + _PyUnicode_COMPACT_DATA(op) + } else { + _PyUnicode_NONCOMPACT_DATA(op) + } +} + +// skipped PyUnicode_WRITE +// skipped PyUnicode_READ +// skipped PyUnicode_READ_CHAR + +#[inline] +pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t { + debug_assert!(PyUnicode_Check(op) != 0); + debug_assert!(PyUnicode_IS_READY(op) != 0); + + (*(op as *mut PyASCIIObject)).length +} + +#[inline] +pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint { + (*(op as *mut PyASCIIObject)).ready() +} + +#[cfg(not(Py_3_12))] +#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))] +#[inline] +pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int { + debug_assert!(PyUnicode_Check(op) != 0); + + if PyUnicode_IS_READY(op) != 0 { + 0 + } else { + _PyUnicode_Ready(op) + } +} + +// skipped PyUnicode_MAX_CHAR_VALUE +// skipped _PyUnicode_get_wstr_length +// skipped PyUnicode_WSTR_LENGTH + +extern "C" { + #[cfg_attr(PyPy, link_name = "PyPyUnicode_New")] + pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject; + #[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")] + pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int; + + // skipped _PyUnicode_Copy + + #[cfg(not(PyPy))] + #[cfg_attr(docsrs, doc(cfg(not(PyPy))))] + pub fn PyUnicode_CopyCharacters( + to: *mut PyObject, + to_start: Py_ssize_t, + from: *mut PyObject, + from_start: Py_ssize_t, + how_many: Py_ssize_t, + ) -> Py_ssize_t; + + // skipped _PyUnicode_FastCopyCharacters + + #[cfg(not(PyPy))] + #[cfg_attr(docsrs, doc(cfg(not(PyPy))))] + pub fn PyUnicode_Fill( + unicode: *mut PyObject, + start: Py_ssize_t, + length: Py_ssize_t, + fill_char: Py_UCS4, + ) -> Py_ssize_t; + + // skipped _PyUnicode_FastFill + + #[cfg(not(Py_3_12))] + #[deprecated] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")] + pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject; + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")] + pub fn PyUnicode_FromKindAndData( + kind: c_int, + buffer: *const c_void, + size: Py_ssize_t, + ) -> *mut PyObject; + + // skipped _PyUnicode_FromASCII + // skipped _PyUnicode_FindMaxChar + + #[cfg(not(Py_3_12))] + #[deprecated] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")] + pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE; + + // skipped _PyUnicode_AsUnicode + + #[cfg(not(Py_3_12))] + #[deprecated] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")] + pub fn PyUnicode_AsUnicodeAndSize( + unicode: *mut PyObject, + size: *mut Py_ssize_t, + ) -> *mut Py_UNICODE; + + // skipped PyUnicode_GetMax +} + +// skipped _PyUnicodeWriter +// skipped _PyUnicodeWriter_Init +// skipped _PyUnicodeWriter_Prepare +// skipped _PyUnicodeWriter_PrepareInternal +// skipped _PyUnicodeWriter_PrepareKind +// skipped _PyUnicodeWriter_PrepareKindInternal +// skipped _PyUnicodeWriter_WriteChar +// skipped _PyUnicodeWriter_WriteStr +// skipped _PyUnicodeWriter_WriteSubstring +// skipped _PyUnicodeWriter_WriteASCIIString +// skipped _PyUnicodeWriter_WriteLatin1String +// skipped _PyUnicodeWriter_Finish +// skipped _PyUnicodeWriter_Dealloc +// skipped _PyUnicode_FormatAdvancedWriter + +extern "C" { + #[cfg(Py_3_7)] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")] + pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char; + + #[cfg(not(Py_3_7))] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")] + pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char; + + // skipped _PyUnicode_AsStringAndSize + + #[cfg(Py_3_7)] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")] + pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char; + + #[cfg(not(Py_3_7))] + #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")] + pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char; + + // skipped _PyUnicode_AsString + + pub fn PyUnicode_Encode( + s: *const Py_UNICODE, + size: Py_ssize_t, + encoding: *const c_char, + errors: *const c_char, + ) -> *mut PyObject; + + pub fn PyUnicode_EncodeUTF7( + data: *const Py_UNICODE, + length: Py_ssize_t, + base64SetO: c_int, + base64WhiteSpace: c_int, + errors: *const c_char, + ) -> *mut PyObject; + + // skipped _PyUnicode_EncodeUTF7 + // skipped _PyUnicode_AsUTF8String + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")] + pub fn PyUnicode_EncodeUTF8( + data: *const Py_UNICODE, + length: Py_ssize_t, + errors: *const c_char, + ) -> *mut PyObject; + + pub fn PyUnicode_EncodeUTF32( + data: *const Py_UNICODE, + length: Py_ssize_t, + errors: *const c_char, + byteorder: c_int, + ) -> *mut PyObject; + + // skipped _PyUnicode_EncodeUTF32 + + pub fn PyUnicode_EncodeUTF16( + data: *const Py_UNICODE, + length: Py_ssize_t, + errors: *const c_char, + byteorder: c_int, + ) -> *mut PyObject; + + // skipped _PyUnicode_EncodeUTF16 + // skipped _PyUnicode_DecodeUnicodeEscape + + pub fn PyUnicode_EncodeUnicodeEscape( + data: *const Py_UNICODE, + length: Py_ssize_t, + ) -> *mut PyObject; + + pub fn PyUnicode_EncodeRawUnicodeEscape( + data: *const Py_UNICODE, + length: Py_ssize_t, + ) -> *mut PyObject; + + // skipped _PyUnicode_AsLatin1String + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")] + pub fn PyUnicode_EncodeLatin1( + data: *const Py_UNICODE, + length: Py_ssize_t, + errors: *const c_char, + ) -> *mut PyObject; + + // skipped _PyUnicode_AsASCIIString + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")] + pub fn PyUnicode_EncodeASCII( + data: *const Py_UNICODE, + length: Py_ssize_t, + errors: *const c_char, + ) -> *mut PyObject; + + pub fn PyUnicode_EncodeCharmap( + data: *const Py_UNICODE, + length: Py_ssize_t, + mapping: *mut PyObject, + errors: *const c_char, + ) -> *mut PyObject; + + // skipped _PyUnicode_EncodeCharmap + + pub fn PyUnicode_TranslateCharmap( + data: *const Py_UNICODE, + length: Py_ssize_t, + table: *mut PyObject, + errors: *const c_char, + ) -> *mut PyObject; + + // skipped PyUnicode_EncodeMBCS + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")] + pub fn PyUnicode_EncodeDecimal( + s: *mut Py_UNICODE, + length: Py_ssize_t, + output: *mut c_char, + errors: *const c_char, + ) -> c_int; + + #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")] + pub fn PyUnicode_TransformDecimalToASCII( + s: *mut Py_UNICODE, + length: Py_ssize_t, + ) -> *mut PyObject; + + // skipped _PyUnicode_TransformDecimalAndSpaceToASCII +} + +// skipped _PyUnicode_JoinArray +// skipped _PyUnicode_EqualToASCIIId +// skipped _PyUnicode_EqualToASCIIString +// skipped _PyUnicode_XStrip +// skipped _PyUnicode_InsertThousandsGrouping + +// skipped _Py_ascii_whitespace + +// skipped _PyUnicode_IsLowercase +// skipped _PyUnicode_IsUppercase +// skipped _PyUnicode_IsTitlecase +// skipped _PyUnicode_IsXidStart +// skipped _PyUnicode_IsXidContinue +// skipped _PyUnicode_IsWhitespace +// skipped _PyUnicode_IsLinebreak +// skipped _PyUnicode_ToLowercase +// skipped _PyUnicode_ToUppercase +// skipped _PyUnicode_ToTitlecase +// skipped _PyUnicode_ToLowerFull +// skipped _PyUnicode_ToTitleFull +// skipped _PyUnicode_ToUpperFull +// skipped _PyUnicode_ToFoldedFull +// skipped _PyUnicode_IsCaseIgnorable +// skipped _PyUnicode_IsCased +// skipped _PyUnicode_ToDecimalDigit +// skipped _PyUnicode_ToDigit +// skipped _PyUnicode_ToNumeric +// skipped _PyUnicode_IsDecimalDigit +// skipped _PyUnicode_IsDigit +// skipped _PyUnicode_IsNumeric +// skipped _PyUnicode_IsPrintable +// skipped _PyUnicode_IsAlpha +// skipped Py_UNICODE_strlen +// skipped Py_UNICODE_strcpy +// skipped Py_UNICODE_strcat +// skipped Py_UNICODE_strncpy +// skipped Py_UNICODE_strcmp +// skipped Py_UNICODE_strncmp +// skipped Py_UNICODE_strchr +// skipped Py_UNICODE_strrchr +// skipped _PyUnicode_FormatLong +// skipped PyUnicode_AsUnicodeCopy +// skipped _PyUnicode_FromId +// skipped _PyUnicode_EQ +// skipped _PyUnicode_ScanIdentifier + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::PyString; + use crate::{AsPyPointer, Python}; + + #[test] + fn ascii_object_bitfield() { + let ob_base: PyObject = unsafe { std::mem::zeroed() }; + + let mut o = PyASCIIObject { + ob_base, + length: 0, + hash: 0, + state: 0, + wstr: std::ptr::null_mut() as *mut wchar_t, + }; + + assert_eq!(o.interned(), 0); + assert_eq!(o.kind(), 0); + assert_eq!(o.compact(), 0); + assert_eq!(o.ascii(), 0); + assert_eq!(o.ready(), 0); + + for i in 0..4 { + o.state = i; + assert_eq!(o.interned(), i); + } + + for i in 0..8 { + o.state = i << 2; + assert_eq!(o.kind(), i); + } + + o.state = 1 << 5; + assert_eq!(o.compact(), 1); + + o.state = 1 << 6; + assert_eq!(o.ascii(), 1); + + o.state = 1 << 7; + assert_eq!(o.ready(), 1); + } + + #[test] + #[cfg_attr(Py_3_10, allow(deprecated))] + fn ascii() { + Python::with_gil(|py| { + // This test relies on implementation details of PyString. + let s = PyString::new(py, "hello, world"); + let ptr = s.as_ptr(); + + unsafe { + let ascii_ptr = ptr as *mut PyASCIIObject; + let ascii = ascii_ptr.as_ref().unwrap(); + + assert_eq!(ascii.interned(), 0); + assert_eq!(ascii.kind(), PyUnicode_1BYTE_KIND); + assert_eq!(ascii.compact(), 1); + assert_eq!(ascii.ascii(), 1); + assert_eq!(ascii.ready(), 1); + + assert_eq!(PyUnicode_IS_ASCII(ptr), 1); + assert_eq!(PyUnicode_IS_COMPACT(ptr), 1); + assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 1); + + assert!(!PyUnicode_1BYTE_DATA(ptr).is_null()); + // 2 and 4 byte macros return nonsense for this string instance. + assert_eq!(PyUnicode_KIND(ptr), PyUnicode_1BYTE_KIND); + + assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null()); + // _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings. + assert!(!PyUnicode_DATA(ptr).is_null()); + + assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _); + assert_eq!(PyUnicode_IS_READY(ptr), 1); + + // This has potential to mutate object. But it should be a no-op since + // we're already ready. + assert_eq!(PyUnicode_READY(ptr), 0); + } + }) + } + + #[test] + #[cfg_attr(Py_3_10, allow(deprecated))] + fn ucs4() { + Python::with_gil(|py| { + let s = "哈哈🐈"; + let py_string = PyString::new(py, s); + let ptr = py_string.as_ptr(); + + unsafe { + let ascii_ptr = ptr as *mut PyASCIIObject; + let ascii = ascii_ptr.as_ref().unwrap(); + + assert_eq!(ascii.interned(), 0); + assert_eq!(ascii.kind(), PyUnicode_4BYTE_KIND); + assert_eq!(ascii.compact(), 1); + assert_eq!(ascii.ascii(), 0); + assert_eq!(ascii.ready(), 1); + + assert_eq!(PyUnicode_IS_ASCII(ptr), 0); + assert_eq!(PyUnicode_IS_COMPACT(ptr), 1); + assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 0); + + assert!(!PyUnicode_4BYTE_DATA(ptr).is_null()); + assert_eq!(PyUnicode_KIND(ptr), PyUnicode_4BYTE_KIND); + + assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null()); + // _PyUnicode_NONCOMPACT_DATA isn't valid for compact strings. + assert!(!PyUnicode_DATA(ptr).is_null()); + + assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _); + assert_eq!(PyUnicode_IS_READY(ptr), 1); + + // This has potential to mutate object. But it should be a no-op since + // we're already ready. + assert_eq!(PyUnicode_READY(ptr), 0); + } + }) + } +} diff --git a/src/ffi/unicodeobject.rs b/src/ffi/unicodeobject.rs index de51d816012..9f215cf8299 100644 --- a/src/ffi/unicodeobject.rs +++ b/src/ffi/unicodeobject.rs @@ -40,40 +40,11 @@ pub unsafe fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int { pub const Py_UNICODE_REPLACEMENT_CHARACTER: Py_UCS4 = 0xFFFD; extern "C" { - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject; - - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_CopyCharacters( - to: *mut PyObject, - to_start: Py_ssize_t, - from: *mut PyObject, - from_start: Py_ssize_t, - how_many: Py_ssize_t, - ) -> Py_ssize_t; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_Fill( - unicode: *mut PyObject, - start: Py_ssize_t, - length: Py_ssize_t, - fill_char: Py_UCS4, - ) -> Py_ssize_t; - #[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))] - #[deprecated] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")] - pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromStringAndSize")] pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject; pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_FromKindAndData( - kind: c_int, - buffer: *const c_void, - size: Py_ssize_t, - ) -> *mut PyObject; - pub fn PyUnicode_Substring( str: *mut PyObject, start: Py_ssize_t, @@ -86,17 +57,6 @@ extern "C" { copy_null: c_int, ) -> *mut Py_UCS4; pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4; - #[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))] - #[deprecated] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")] - pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE; - #[cfg(all(not(Py_LIMITED_API), not(Py_3_12)))] - #[deprecated] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")] - pub fn PyUnicode_AsUnicodeAndSize( - unicode: *mut PyObject, - size: *mut Py_ssize_t, - ) -> *mut Py_UNICODE; #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetLength")] pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t; #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetSize")] @@ -143,20 +103,6 @@ extern "C" { #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromOrdinal")] pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject; pub fn PyUnicode_ClearFreeList() -> c_int; - #[cfg(any(not(Py_LIMITED_API), Py_3_10))] - #[cfg(Py_3_7)] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")] - pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char; - #[cfg(not(Py_3_7))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")] - pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char; - #[cfg(not(Py_LIMITED_API))] - #[cfg(Py_3_7)] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")] - pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char; - #[cfg(not(Py_3_7))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")] - pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char; #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetDefaultEncoding")] pub fn PyUnicode_GetDefaultEncoding() -> *const c_char; #[cfg_attr(PyPy, link_name = "PyPyUnicode_Decode")] @@ -176,13 +122,6 @@ extern "C" { encoding: *const c_char, errors: *const c_char, ) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_Encode( - s: *const Py_UNICODE, - size: Py_ssize_t, - encoding: *const c_char, - errors: *const c_char, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedObject")] pub fn PyUnicode_AsEncodedObject( unicode: *mut PyObject, @@ -212,14 +151,6 @@ extern "C" { errors: *const c_char, consumed: *mut Py_ssize_t, ) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeUTF7( - data: *const Py_UNICODE, - length: Py_ssize_t, - base64SetO: c_int, - base64WhiteSpace: c_int, - errors: *const c_char, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF8")] pub fn PyUnicode_DecodeUTF8( string: *const c_char, @@ -234,13 +165,6 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8String")] pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")] - pub fn PyUnicode_EncodeUTF8( - data: *const Py_UNICODE, - length: Py_ssize_t, - errors: *const c_char, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF32")] pub fn PyUnicode_DecodeUTF32( string: *const c_char, @@ -257,13 +181,6 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF32String")] pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeUTF32( - data: *const Py_UNICODE, - length: Py_ssize_t, - errors: *const c_char, - byteorder: c_int, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF16")] pub fn PyUnicode_DecodeUTF16( string: *const c_char, @@ -280,13 +197,6 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF16String")] pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeUTF16( - data: *const Py_UNICODE, - length: Py_ssize_t, - errors: *const c_char, - byteorder: c_int, - ) -> *mut PyObject; pub fn PyUnicode_DecodeUnicodeEscape( string: *const c_char, length: Py_ssize_t, @@ -294,22 +204,12 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeEscapeString")] pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeUnicodeEscape( - data: *const Py_UNICODE, - length: Py_ssize_t, - ) -> *mut PyObject; pub fn PyUnicode_DecodeRawUnicodeEscape( string: *const c_char, length: Py_ssize_t, errors: *const c_char, ) -> *mut PyObject; pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeRawUnicodeEscape( - data: *const Py_UNICODE, - length: Py_ssize_t, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeLatin1")] pub fn PyUnicode_DecodeLatin1( string: *const c_char, @@ -318,13 +218,6 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsLatin1String")] pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")] - pub fn PyUnicode_EncodeLatin1( - data: *const Py_UNICODE, - length: Py_ssize_t, - errors: *const c_char, - ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeASCII")] pub fn PyUnicode_DecodeASCII( string: *const c_char, @@ -333,13 +226,6 @@ extern "C" { ) -> *mut PyObject; #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsASCIIString")] pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")] - pub fn PyUnicode_EncodeASCII( - data: *const Py_UNICODE, - length: Py_ssize_t, - errors: *const c_char, - ) -> *mut PyObject; pub fn PyUnicode_DecodeCharmap( string: *const c_char, length: Py_ssize_t, @@ -350,35 +236,6 @@ extern "C" { unicode: *mut PyObject, mapping: *mut PyObject, ) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_EncodeCharmap( - data: *const Py_UNICODE, - length: Py_ssize_t, - mapping: *mut PyObject, - errors: *const c_char, - ) -> *mut PyObject; - #[cfg(not(Py_LIMITED_API))] - pub fn PyUnicode_TranslateCharmap( - data: *const Py_UNICODE, - length: Py_ssize_t, - table: *mut PyObject, - errors: *const c_char, - ) -> *mut PyObject; - - #[cfg(not(Py_LIMITED_API))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")] - pub fn PyUnicode_EncodeDecimal( - s: *mut Py_UNICODE, - length: Py_ssize_t, - output: *mut c_char, - errors: *const c_char, - ) -> c_int; - #[cfg(not(Py_LIMITED_API))] - #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")] - pub fn PyUnicode_TransformDecimalToASCII( - s: *mut Py_UNICODE, - length: Py_ssize_t, - ) -> *mut PyObject; pub fn PyUnicode_DecodeLocaleAndSize( str: *const c_char, len: Py_ssize_t, diff --git a/src/types/string.rs b/src/types/string.rs index 5cf39a5094b..f382e33264c 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -49,8 +49,8 @@ impl PyString { pub fn to_str(&self) -> PyResult<&str> { let utf8_slice = { cfg_if::cfg_if! { - if #[cfg(any(not(Py_LIMITED_API), Py_3_10))] { - // PyUnicode_AsUTF8AndSize only available on limited API from Python 3.10 and up. + if #[cfg(not(Py_LIMITED_API))] { + // PyUnicode_AsUTF8AndSize only available on limited API. let mut size: ffi::Py_ssize_t = 0; let data = unsafe { ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) }; if data.is_null() {