Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for alternate encodings of bytes types #656

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ matrix:
- cargo test --features arbitrary_precision
- cargo test --features raw_value
- cargo test --features unbounded_depth
- cargo test --features bytes_mode
- cargo test --features base64

- rust: stable
- rust: beta
Expand Down
7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ serde = { version = "1.0.100", default-features = false }
indexmap = { version = "1.2", optional = true }
itoa = { version = "0.4.3", default-features = false }
ryu = "1.0"
b64-ct = { version = "0.1", default-features = false, optional = true }

[dev-dependencies]
automod = "0.1"
Expand Down Expand Up @@ -76,3 +77,9 @@ raw_value = []
# overflow the stack after deserialization has completed, including, but not
# limited to, Display and Debug and Drop impls.
unbounded_depth = []

# Support alternate encoding modes for bytes. Available on Rust 1.40+
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bytes_mode = []

# Support the Base64-encoding alternate bytes encoding mode
base64 = ["bytes_mode", "b64-ct"]
138 changes: 138 additions & 0 deletions src/base64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//! Convenience functions for the base64 alternate byte encoding mode.
use crate::de::Deserializer;
use crate::error::Result;
use crate::io;
use crate::read::{self, Read};
use crate::ser::{CompactFormatter, PrettyFormatter, SerializerBuilder};
use crate::value;
use crate::BytesMode;
use serde::de;
use serde::ser::Serialize;

fn from_trait<'de, R, T>(read: R) -> Result<T>
where
R: Read<'de>,
T: de::Deserialize<'de>,
{
let mut de = Deserializer::with_bytes_mode(read, BytesMode::Base64);
let value = tri!(de::Deserialize::deserialize(&mut de));

// Make sure the whole stream has been consumed.
tri!(de.end());
Ok(value)
}

/// Like `from_reader`, except it uses BytesMode::Base64.
#[cfg(feature = "std")]
pub fn from_reader<R, T>(rdr: R) -> Result<T>
where
R: crate::io::Read,
T: de::DeserializeOwned,
{
from_trait(read::IoRead::new(rdr))
}

/// Like `from_slice`, except it uses BytesMode::Base64.
pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<T>
where
T: de::Deserialize<'a>,
{
from_trait(read::SliceRead::new(v))
}

/// Like `from_str`, except it uses BytesMode::Base64.
pub fn from_str<'a, T>(s: &'a str) -> Result<T>
where
T: de::Deserialize<'a>,
{
from_trait(read::StrRead::new(s))
}

/// Like `to_writer`, except it uses BytesMode::Base64.
#[cfg(feature = "std")]
#[inline]
pub fn to_writer<W, T>(writer: W, value: &T) -> Result<()>
where
W: io::Write,
T: ?Sized + Serialize,
{
let mut ser = SerializerBuilder::with_formatter(writer, CompactFormatter)
.bytes_mode(BytesMode::Base64)
.build();
tri!(value.serialize(&mut ser));
Ok(())
}

/// Like `to_writer_pretty`, except it uses BytesMode::Base64.
#[cfg(feature = "std")]
#[inline]
pub fn to_writer_pretty<W, T>(writer: W, value: &T) -> Result<()>
where
W: io::Write,
T: ?Sized + Serialize,
{
let mut ser = SerializerBuilder::with_formatter(writer, PrettyFormatter::new())
.bytes_mode(BytesMode::Base64)
.build();
tri!(value.serialize(&mut ser));
Ok(())
}

/// Like `to_vec`, except it uses BytesMode::Base64.
#[inline]
pub fn to_vec<T>(value: &T) -> Result<Vec<u8>>
where
T: ?Sized + Serialize,
{
let mut writer = Vec::with_capacity(128);
tri!(to_writer(&mut writer, value));
Ok(writer)
}

/// Like `to_vec_pretty`, except it uses BytesMode::Base64.
#[inline]
pub fn to_vec_pretty<T>(value: &T) -> Result<Vec<u8>>
where
T: ?Sized + Serialize,
{
let mut writer = Vec::with_capacity(128);
tri!(to_writer_pretty(&mut writer, value));
Ok(writer)
}

/// Like `to_string`, except it uses BytesMode::Base64.
#[inline]
pub fn to_string<T>(value: &T) -> Result<String>
where
T: ?Sized + Serialize,
{
let vec = tri!(to_vec(value));
let string = unsafe {
// We do not emit invalid UTF-8.
String::from_utf8_unchecked(vec)
};
Ok(string)
}

/// Like `to_string_pretty`, except it uses BytesMode::Base64.
#[inline]
pub fn to_string_pretty<T>(value: &T) -> Result<String>
where
T: ?Sized + Serialize,
{
let vec = tri!(to_vec_pretty(value));
let string = unsafe {
// We do not emit invalid UTF-8.
String::from_utf8_unchecked(vec)
};
Ok(string)
}

/// Like `to_value`, except it uses BytesMode::Base64.
pub fn to_value<T>(value: T) -> Result<value::Value>
where
T: Serialize,
{
value.serialize(value::Serializer::with_bytes_mode(BytesMode::Base64))
}
108 changes: 34 additions & 74 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ use crate::lib::str::FromStr;
use crate::lib::*;
use crate::number::Number;
use crate::read::{self, Fused, Reference};
use crate::BytesMode;
use serde::de::{self, Expected, Unexpected};
use serde::{forward_to_deserialize_any, serde_if_integer128};

#[cfg(feature = "base64")]
use b64_ct::FromBase64;

#[cfg(feature = "arbitrary_precision")]
use crate::number::NumberDeserializer;

Expand All @@ -25,6 +29,7 @@ pub struct Deserializer<R> {
remaining_depth: u8,
#[cfg(feature = "unbounded_depth")]
disable_recursion_limit: bool,
bytes_mode: BytesMode,
}

impl<'de, R> Deserializer<R>
Expand All @@ -46,6 +51,7 @@ where
read: read,
scratch: Vec::new(),
remaining_depth: 128,
bytes_mode: BytesMode::default(),
}
}

Expand All @@ -56,9 +62,19 @@ where
scratch: Vec::new(),
remaining_depth: 128,
disable_recursion_limit: false,
bytes_mode: BytesMode::default(),
}
}
}

/// Create a JSON deserializer with a specified encoding mode for bytes.
#[cfg(feature = "bytes_mode")]
pub fn with_bytes_mode(read: R, bytes_mode: BytesMode) -> Self {
Deserializer {
bytes_mode,
..Self::new(read)
}
}
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -1332,77 +1348,7 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
self.deserialize_str(visitor)
}

/// Parses a JSON string as bytes. Note that this function does not check
/// whether the bytes represent a valid UTF-8 string.
///
/// The relevant part of the JSON specification is Section 8.2 of [RFC
/// 7159]:
///
/// > When all the strings represented in a JSON text are composed entirely
/// > of Unicode characters (however escaped), then that JSON text is
/// > interoperable in the sense that all software implementations that
/// > parse it will agree on the contents of names and of string values in
/// > objects and arrays.
/// >
/// > However, the ABNF in this specification allows member names and string
/// > values to contain bit sequences that cannot encode Unicode characters;
/// > for example, "\uDEAD" (a single unpaired UTF-16 surrogate). Instances
/// > of this have been observed, for example, when a library truncates a
/// > UTF-16 string without checking whether the truncation split a
/// > surrogate pair. The behavior of software that receives JSON texts
/// > containing such values is unpredictable; for example, implementations
/// > might return different values for the length of a string value or even
/// > suffer fatal runtime exceptions.
///
/// [RFC 7159]: https://tools.ietf.org/html/rfc7159
///
/// The behavior of serde_json is specified to fail on non-UTF-8 strings
/// when deserializing into Rust UTF-8 string types such as String, and
/// succeed with non-UTF-8 bytes when deserializing using this method.
///
/// Escape sequences are processed as usual, and for `\uXXXX` escapes it is
/// still checked if the hex number represents a valid Unicode code point.
///
/// # Examples
///
/// You can use this to parse JSON strings containing invalid UTF-8 bytes.
///
/// ```
/// use serde_bytes::ByteBuf;
///
/// fn look_at_bytes() -> Result<(), serde_json::Error> {
/// let json_data = b"\"some bytes: \xe5\x00\xe5\"";
/// let bytes: ByteBuf = serde_json::from_slice(json_data)?;
///
/// assert_eq!(b'\xe5', bytes[12]);
/// assert_eq!(b'\0', bytes[13]);
/// assert_eq!(b'\xe5', bytes[14]);
///
/// Ok(())
/// }
/// #
/// # look_at_bytes().unwrap();
/// ```
///
/// Backslash escape sequences like `\n` are still interpreted and required
/// to be valid, and `\u` escape sequences are required to represent valid
/// Unicode code points.
///
/// ```
/// use serde_bytes::ByteBuf;
///
/// fn look_at_bytes() {
/// let json_data = b"\"invalid unicode surrogate: \\uD801\"";
/// let parsed: Result<ByteBuf, _> = serde_json::from_slice(json_data);
///
/// assert!(parsed.is_err());
///
/// let expected_msg = "unexpected end of hex escape at line 1 column 35";
/// assert_eq!(expected_msg, parsed.unwrap_err().to_string());
/// }
/// #
/// # look_at_bytes();
/// ```
/// Deserialize bytes according to the deserializer's byte mode.
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
Expand All @@ -1418,9 +1364,23 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
b'"' => {
self.eat_char();
self.scratch.clear();
match tri!(self.read.parse_str_raw(&mut self.scratch)) {
Reference::Borrowed(b) => visitor.visit_borrowed_bytes(b),
Reference::Copied(b) => visitor.visit_bytes(b),
match self.bytes_mode {
BytesMode::IntegerArray => {
match tri!(self.read.parse_str_raw(&mut self.scratch)) {
Reference::Borrowed(b) => visitor.visit_borrowed_bytes(b),
Reference::Copied(b) => visitor.visit_bytes(b),
}
}
#[cfg(feature = "base64")]
BytesMode::Base64 => {
let string = self.read.parse_str(&mut self.scratch)?;
visitor.visit_bytes(&string.from_base64().map_err(|_| {
de::Error::invalid_value(
de::Unexpected::Str(&string),
&"base64 encoded string",
)
})?)
}
}
}
b'[' => self.deserialize_seq(visitor),
Expand Down
Loading