Skip to content

Commit

Permalink
python3.12 unicode compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jul 3, 2022
1 parent fa88026 commit 3d290cb
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 131 deletions.
2 changes: 1 addition & 1 deletion script/pysort
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import orjson
os.sched_setaffinity(os.getpid(), {0, 1})


dirname = os.path.join(os.path.dirname(__file__), "data")
dirname = os.path.join(os.path.dirname(__file__), "..", "data")


def read_fixture_obj(filename):
Expand Down
59 changes: 28 additions & 31 deletions src/deserialize/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,46 +35,43 @@ pub fn read_input_to_buf(
ptr: *mut pyo3_ffi::PyObject,
) -> Result<&'static [u8], DeserializeError<'static>> {
let obj_type_ptr = ob_type!(ptr);
let contents: &[u8];
if is_type!(obj_type_ptr, STR_TYPE) {
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = read_utf8_from_str(ptr, &mut str_size);
if unlikely!(uni.is_null()) {
let buffer: *const u8;
let length: usize;
if is_type!(obj_type_ptr, BYTES_TYPE) {
buffer = unsafe { PyBytes_AS_STRING(ptr) as *const u8 };
length = unsafe { PyBytes_GET_SIZE(ptr) as usize };
} else if is_type!(obj_type_ptr, STR_TYPE) {
let uni = unicode_to_str(ptr);
if unlikely!(uni.is_none()) {
return Err(DeserializeError::new(Cow::Borrowed(INVALID_STR), 0, 0, ""));
}
contents = unsafe { std::slice::from_raw_parts(uni, str_size as usize) };
} else {
let buffer: *const u8;
let length: usize;
if is_type!(obj_type_ptr, BYTES_TYPE) {
buffer = unsafe { PyBytes_AS_STRING(ptr) as *const u8 };
length = unsafe { PyBytes_GET_SIZE(ptr) as usize };
} else if is_type!(obj_type_ptr, MEMORYVIEW_TYPE) {
let membuf = unsafe { PyMemoryView_GET_BUFFER(ptr) };
if unsafe { pyo3_ffi::PyBuffer_IsContiguous(membuf, b'C' as c_char) == 0 } {
return Err(DeserializeError::new(
Cow::Borrowed("Input type memoryview must be a C contiguous buffer"),
0,
0,
"",
));
}
buffer = unsafe { (*membuf).buf as *const u8 };
length = unsafe { (*membuf).len as usize };
} else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) {
buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8;
length = ffi!(PyByteArray_Size(ptr)) as usize;
} else {
let as_str = uni.unwrap();
buffer = as_str.as_ptr();
length = as_str.len();
} else if is_type!(obj_type_ptr, MEMORYVIEW_TYPE) {
let membuf = unsafe { PyMemoryView_GET_BUFFER(ptr) };
if unsafe { pyo3_ffi::PyBuffer_IsContiguous(membuf, b'C' as c_char) == 0 } {
return Err(DeserializeError::new(
Cow::Borrowed("Input must be bytes, bytearray, memoryview, or str"),
Cow::Borrowed("Input type memoryview must be a C contiguous buffer"),
0,
0,
"",
));
}
contents = unsafe { std::slice::from_raw_parts(buffer, length) };
buffer = unsafe { (*membuf).buf as *const u8 };
length = unsafe { (*membuf).len as usize };
} else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) {
buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8;
length = ffi!(PyByteArray_Size(ptr)) as usize;
} else {
return Err(DeserializeError::new(
Cow::Borrowed("Input must be bytes, bytearray, memoryview, or str"),
0,
0,
"",
));
}
Ok(contents)
Ok(unsafe { std::slice::from_raw_parts(buffer, length) })
}

pub fn read_buf_to_str(contents: &[u8]) -> Result<&str, DeserializeError> {
Expand Down
45 changes: 19 additions & 26 deletions src/serialize/dataclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,9 @@ impl Serialize for DataclassFastSerializer {
}
let mut map = serializer.serialize_map(None).unwrap();
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=len - 1 {
for _ in 0..=len.saturating_sub(1) {
unsafe {
pyo3_ffi::_PyDict_Next(
self.dict,
Expand All @@ -63,7 +62,7 @@ impl Serialize for DataclassFastSerializer {
std::ptr::null_mut(),
)
};
let value = PyObjectSerializer::new(
let pyvalue = PyObjectSerializer::new(
value,
self.opts,
self.default_calls,
Expand All @@ -73,19 +72,16 @@ impl Serialize for DataclassFastSerializer {
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
{
let data = read_utf8_from_str(key, &mut str_size);
if unlikely!(data.is_null()) {
err!(SerializeError::InvalidStr)
}
let key_as_str = str_from_slice!(data, str_size);
if unlikely!(key_as_str.as_bytes()[0] == b'_') {
continue;
}
map.serialize_key(key_as_str).unwrap();
let data = unicode_to_str(key);
if unlikely!(data.is_none()) {
err!(SerializeError::InvalidStr)
}

map.serialize_value(&value)?;
let key_as_str = data.unwrap();
if unlikely!(key_as_str.as_bytes()[0] == b'_') {
continue;
}
map.serialize_key(key_as_str).unwrap();
map.serialize_value(&pyvalue)?;
}
map.end()
}
Expand Down Expand Up @@ -131,7 +127,6 @@ impl Serialize for DataclassFallbackSerializer {
}
let mut map = serializer.serialize_map(None).unwrap();
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut attr: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut field: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=len - 1 {
Expand All @@ -149,21 +144,19 @@ impl Serialize for DataclassFallbackSerializer {
if unsafe { field_type != FIELD_TYPE.as_ptr() } {
continue;
}
{
let data = read_utf8_from_str(attr, &mut str_size);
if unlikely!(data.is_null()) {
err!(SerializeError::InvalidStr);
}
let key_as_str = str_from_slice!(data, str_size);
if key_as_str.as_bytes()[0] == b'_' {
continue;
}
map.serialize_key(key_as_str).unwrap();
let data = unicode_to_str(attr);
if unlikely!(data.is_none()) {
err!(SerializeError::InvalidStr);
}
let key_as_str = data.unwrap();
if key_as_str.as_bytes()[0] == b'_' {
continue;
}

let value = ffi!(PyObject_GetAttr(self.ptr, attr));
ffi!(Py_DECREF(value));

map.serialize_key(key_as_str).unwrap();
map.serialize_value(&PyObjectSerializer::new(
value,
self.opts,
Expand Down
55 changes: 22 additions & 33 deletions src/serialize/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ impl Serialize for Dict {
{
let mut map = serializer.serialize_map(None).unwrap();
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=unsafe { PyDict_GET_SIZE(self.ptr) as usize } - 1 {
Expand All @@ -63,25 +62,22 @@ impl Serialize for Dict {
std::ptr::null_mut(),
)
};
let value = PyObjectSerializer::new(
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
let key_as_str = unicode_to_str(key);
if unlikely!(key_as_str.is_none()) {
err!(SerializeError::InvalidStr)
}
let pyvalue = PyObjectSerializer::new(
value,
self.opts,
self.default_calls,
self.recursion + 1,
self.default,
);
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
{
let data = read_utf8_from_str(key, &mut str_size);
if unlikely!(data.is_null()) {
err!(SerializeError::InvalidStr)
}
map.serialize_key(str_from_slice!(data, str_size)).unwrap();
}

map.serialize_value(&value)?;
map.serialize_key(key_as_str.unwrap()).unwrap();
map.serialize_value(&pyvalue)?;
}
map.end()
}
Expand Down Expand Up @@ -123,7 +119,6 @@ impl Serialize for DictSortedKey {
let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=len - 1 {
Expand All @@ -139,11 +134,11 @@ impl Serialize for DictSortedKey {
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
let data = read_utf8_from_str(key, &mut str_size);
if unlikely!(data.is_null()) {
let data = unicode_to_str(key);
if unlikely!(data.is_none()) {
err!(SerializeError::InvalidStr)
}
items.push((str_from_slice!(data, str_size), value));
items.push((data.unwrap(), value));
}

items.sort_unstable_by(|a, b| a.0.cmp(b.0));
Expand Down Expand Up @@ -263,21 +258,19 @@ impl DictNonStrKey {
}
ObType::Str => {
// because of ObType::Enum
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = read_utf8_from_str(key, &mut str_size);
if unlikely!(uni.is_null()) {
let uni = unicode_to_str(key);
if unlikely!(uni.is_none()) {
Err(SerializeError::InvalidStr)
} else {
Ok(InlinableString::from(str_from_slice!(uni, str_size)))
Ok(InlinableString::from(uni.unwrap()))
}
}
ObType::StrSubclass => {
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = ffi!(PyUnicode_AsUTF8AndSize(key, &mut str_size)) as *const u8;
if unlikely!(uni.is_null()) {
let uni = unicode_to_str_via_ffi(key);
if unlikely!(uni.is_none()) {
Err(SerializeError::InvalidStr)
} else {
Ok(InlinableString::from(str_from_slice!(uni, str_size)))
Ok(InlinableString::from(uni.unwrap()))
}
}
ObType::Tuple
Expand All @@ -301,7 +294,6 @@ impl Serialize for DictNonStrKey {
let mut items: SmallVec<[(InlinableString, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let opts = self.opts & NOT_PASSTHROUGH;
Expand All @@ -316,14 +308,11 @@ impl Serialize for DictNonStrKey {
)
};
if is_type!(ob_type!(key), STR_TYPE) {
let data = read_utf8_from_str(key, &mut str_size);
if unlikely!(data.is_null()) {
let data = unicode_to_str(key);
if unlikely!(data.is_none()) {
err!(SerializeError::InvalidStr)
}
items.push((
InlinableString::from(str_from_slice!(data, str_size)),
value,
));
items.push((InlinableString::from(data.unwrap()), value));
} else {
match self.pyobject_to_string(key, opts) {
Ok(key_as_str) => items.push((key_as_str, value)),
Expand Down
8 changes: 3 additions & 5 deletions src/serialize/numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -722,15 +722,13 @@ impl NumpyDatetimeUnit {
let el0 = ffi!(PyList_GET_ITEM(descr, 0));
ffi!(Py_DECREF(descr));
let descr_str = ffi!(PyTuple_GET_ITEM(el0, 1));
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = crate::unicode::read_utf8_from_str(descr_str, &mut str_size);
if str_size < 5 {
let uni = crate::unicode::unicode_to_str(descr_str).unwrap();
if uni.len() < 5 {
return Self::NaT;
}
let fmt = str_from_slice!(uni, str_size);
// unit descriptions are found at
// https://github.com/numpy/numpy/blob/b235f9e701e14ed6f6f6dcba885f7986a833743f/numpy/core/src/multiarray/datetime.c#L79-L96.
match &fmt[4..fmt.len() - 1] {
match &uni[4..uni.len() - 1] {
"Y" => Self::Years,
"M" => Self::Months,
"W" => Self::Weeks,
Expand Down
14 changes: 6 additions & 8 deletions src/serialize/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@ impl Serialize for StrSerializer {
where
S: Serializer,
{
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = read_utf8_from_str(self.ptr, &mut str_size);
if unlikely!(uni.is_null()) {
let uni = unicode_to_str(self.ptr);
if unlikely!(uni.is_none()) {
err!(SerializeError::InvalidStr)
}
serializer.serialize_str(str_from_slice!(uni, str_size))
serializer.serialize_str(uni.unwrap())
}
}

Expand All @@ -47,11 +46,10 @@ impl Serialize for StrSubclassSerializer {
where
S: Serializer,
{
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let uni = ffi!(PyUnicode_AsUTF8AndSize(self.ptr, &mut str_size)) as *const u8;
if unlikely!(uni.is_null()) {
let uni = unicode_to_str_via_ffi(self.ptr);
if unlikely!(uni.is_none()) {
err!(SerializeError::InvalidStr)
}
serializer.serialize_str(str_from_slice!(uni, str_size))
serializer.serialize_str(uni.unwrap())
}
}
Loading

0 comments on commit 3d290cb

Please sign in to comment.