serde-rs · jethrogb · Aug 2, 2017 · Apr 23, 2020 · jethrogb · Apr 23, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -9,6 +9,8 @@ matrix:
         - cargo test --features arbitrary_precision
         - cargo test --features raw_value
         - cargo test --features unbounded_depth
+        - cargo test --features bytes_mode
+        - cargo test --features base64
 
     - rust: stable
     - rust: beta

diff --git a/Cargo.toml b/Cargo.toml
@@ -21,6 +21,7 @@ serde = { version = "1.0.100", default-features = false }
 indexmap = { version = "1.2", optional = true }
 itoa = { version = "0.4.3", default-features = false }
 ryu = "1.0"
+b64-ct = { version = "0.1", default-features = false, optional = true }
 
 [dev-dependencies]
 automod = "0.1"
@@ -76,3 +77,9 @@ raw_value = []
 # overflow the stack after deserialization has completed, including, but not
 # limited to, Display and Debug and Drop impls.
 unbounded_depth = []
+
+# Support alternate encoding modes for bytes. Available on Rust 1.40+
+bytes_mode = []
+
+# Support the Base64-encoding alternate bytes encoding mode
+base64 = ["bytes_mode", "b64-ct"]
diff --git a/src/base64.rs b/src/base64.rs
@@ -0,0 +1,138 @@
+//! Convenience functions for the base64 alternate byte encoding mode.
+
+use crate::de::Deserializer;
+use crate::error::Result;
+use crate::io;
+use crate::read::{self, Read};
+use crate::ser::{CompactFormatter, PrettyFormatter, SerializerBuilder};
+use crate::value;
+use crate::BytesMode;
+use serde::de;
+use serde::ser::Serialize;
+
+fn from_trait<'de, R, T>(read: R) -> Result<T>
+where
+    R: Read<'de>,
+    T: de::Deserialize<'de>,
+{
+    let mut de = Deserializer::with_bytes_mode(read, BytesMode::Base64);
+    let value = tri!(de::Deserialize::deserialize(&mut de));
+
+    // Make sure the whole stream has been consumed.
+    tri!(de.end());
+    Ok(value)
+}
+
+/// Like `from_reader`, except it uses BytesMode::Base64.
+#[cfg(feature = "std")]
+pub fn from_reader<R, T>(rdr: R) -> Result<T>
+where
+    R: crate::io::Read,
+    T: de::DeserializeOwned,
+{
+    from_trait(read::IoRead::new(rdr))
+}
+
+/// Like `from_slice`, except it uses BytesMode::Base64.
+pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<T>
+where
+    T: de::Deserialize<'a>,
+{
+    from_trait(read::SliceRead::new(v))
+}
+
+/// Like `from_str`, except it uses BytesMode::Base64.
+pub fn from_str<'a, T>(s: &'a str) -> Result<T>
+where
+    T: de::Deserialize<'a>,
+{
+    from_trait(read::StrRead::new(s))
+}
+
+/// Like `to_writer`, except it uses BytesMode::Base64.
+#[cfg(feature = "std")]
+#[inline]
+pub fn to_writer<W, T>(writer: W, value: &T) -> Result<()>
+where
+    W: io::Write,
+    T: ?Sized + Serialize,
+{
+    let mut ser = SerializerBuilder::with_formatter(writer, CompactFormatter)
+        .bytes_mode(BytesMode::Base64)
+        .build();
+    tri!(value.serialize(&mut ser));
+    Ok(())
+}
+
+/// Like `to_writer_pretty`, except it uses BytesMode::Base64.
+#[cfg(feature = "std")]
+#[inline]
+pub fn to_writer_pretty<W, T>(writer: W, value: &T) -> Result<()>
+where
+    W: io::Write,
+    T: ?Sized + Serialize,
+{
+    let mut ser = SerializerBuilder::with_formatter(writer, PrettyFormatter::new())
+        .bytes_mode(BytesMode::Base64)
+        .build();
+    tri!(value.serialize(&mut ser));
+    Ok(())
+}
+
+/// Like `to_vec`, except it uses BytesMode::Base64.
+#[inline]
+pub fn to_vec<T>(value: &T) -> Result<Vec<u8>>
+where
+    T: ?Sized + Serialize,
+{
+    let mut writer = Vec::with_capacity(128);
+    tri!(to_writer(&mut writer, value));
+    Ok(writer)
+}
+
+/// Like `to_vec_pretty`, except it uses BytesMode::Base64.
+#[inline]
+pub fn to_vec_pretty<T>(value: &T) -> Result<Vec<u8>>
+where
+    T: ?Sized + Serialize,
+{
+    let mut writer = Vec::with_capacity(128);
+    tri!(to_writer_pretty(&mut writer, value));
+    Ok(writer)
+}
+
+/// Like `to_string`, except it uses BytesMode::Base64.
+#[inline]
+pub fn to_string<T>(value: &T) -> Result<String>
+where
+    T: ?Sized + Serialize,
+{
+    let vec = tri!(to_vec(value));
+    let string = unsafe {
+        // We do not emit invalid UTF-8.
+        String::from_utf8_unchecked(vec)
+    };
+    Ok(string)
+}
+
+/// Like `to_string_pretty`, except it uses BytesMode::Base64.
+#[inline]
+pub fn to_string_pretty<T>(value: &T) -> Result<String>
+where
+    T: ?Sized + Serialize,
+{
+    let vec = tri!(to_vec_pretty(value));
+    let string = unsafe {
+        // We do not emit invalid UTF-8.
+        String::from_utf8_unchecked(vec)
+    };
+    Ok(string)
+}
+
+/// Like `to_value`, except it uses BytesMode::Base64.
+pub fn to_value<T>(value: T) -> Result<value::Value>
+where
+    T: Serialize,
+{
+    value.serialize(value::Serializer::with_bytes_mode(BytesMode::Base64))
+}
diff --git a/src/de.rs b/src/de.rs
@@ -5,9 +5,13 @@ use crate::lib::str::FromStr;
 use crate::lib::*;
 use crate::number::Number;
 use crate::read::{self, Fused, Reference};
+use crate::BytesMode;
 use serde::de::{self, Expected, Unexpected};
 use serde::{forward_to_deserialize_any, serde_if_integer128};
 
+#[cfg(feature = "base64")]
+use b64_ct::FromBase64;
+
 #[cfg(feature = "arbitrary_precision")]
 use crate::number::NumberDeserializer;
 
@@ -25,6 +29,7 @@ pub struct Deserializer<R> {
     remaining_depth: u8,
     #[cfg(feature = "unbounded_depth")]
     disable_recursion_limit: bool,
+    bytes_mode: BytesMode,
 }
 
 impl<'de, R> Deserializer<R>
@@ -46,6 +51,7 @@ where
                 read: read,
                 scratch: Vec::new(),
                 remaining_depth: 128,
+                bytes_mode: BytesMode::default(),
             }
         }
 
@@ -56,9 +62,19 @@ where
                 scratch: Vec::new(),
                 remaining_depth: 128,
                 disable_recursion_limit: false,
+                bytes_mode: BytesMode::default(),
             }
         }
     }
+
+    /// Create a JSON deserializer with a specified encoding mode for bytes.
+    #[cfg(feature = "bytes_mode")]
+    pub fn with_bytes_mode(read: R, bytes_mode: BytesMode) -> Self {
+        Deserializer {
+            bytes_mode,
+            ..Self::new(read)
+        }
+    }
 }
 
 #[cfg(feature = "std")]
@@ -1332,77 +1348,7 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
         self.deserialize_str(visitor)
     }
 
-    /// Parses a JSON string as bytes. Note that this function does not check
-    /// whether the bytes represent a valid UTF-8 string.
-    ///
-    /// The relevant part of the JSON specification is Section 8.2 of [RFC
-    /// 7159]:
-    ///
-    /// > When all the strings represented in a JSON text are composed entirely
-    /// > of Unicode characters (however escaped), then that JSON text is
-    /// > interoperable in the sense that all software implementations that
-    /// > parse it will agree on the contents of names and of string values in
-    /// > objects and arrays.
-    /// >
-    /// > However, the ABNF in this specification allows member names and string
-    /// > values to contain bit sequences that cannot encode Unicode characters;
-    /// > for example, "\uDEAD" (a single unpaired UTF-16 surrogate). Instances
-    /// > of this have been observed, for example, when a library truncates a
-    /// > UTF-16 string without checking whether the truncation split a
-    /// > surrogate pair.  The behavior of software that receives JSON texts
-    /// > containing such values is unpredictable; for example, implementations
-    /// > might return different values for the length of a string value or even
-    /// > suffer fatal runtime exceptions.
-    ///
-    /// [RFC 7159]: https://tools.ietf.org/html/rfc7159
-    ///
-    /// The behavior of serde_json is specified to fail on non-UTF-8 strings
-    /// when deserializing into Rust UTF-8 string types such as String, and
-    /// succeed with non-UTF-8 bytes when deserializing using this method.
-    ///
-    /// Escape sequences are processed as usual, and for `\uXXXX` escapes it is
-    /// still checked if the hex number represents a valid Unicode code point.
-    ///
-    /// # Examples
-    ///
-    /// You can use this to parse JSON strings containing invalid UTF-8 bytes.
-    ///
-    /// ```
-    /// use serde_bytes::ByteBuf;
-    ///
-    /// fn look_at_bytes() -> Result<(), serde_json::Error> {
-    ///     let json_data = b"\"some bytes: \xe5\x00\xe5\"";
-    ///     let bytes: ByteBuf = serde_json::from_slice(json_data)?;
-    ///
-    ///     assert_eq!(b'\xe5', bytes[12]);
-    ///     assert_eq!(b'\0', bytes[13]);
-    ///     assert_eq!(b'\xe5', bytes[14]);
-    ///
-    ///     Ok(())
-    /// }
-    /// #
-    /// # look_at_bytes().unwrap();
-    /// ```
-    ///
-    /// Backslash escape sequences like `\n` are still interpreted and required
-    /// to be valid, and `\u` escape sequences are required to represent valid
-    /// Unicode code points.
-    ///
-    /// ```
-    /// use serde_bytes::ByteBuf;
-    ///
-    /// fn look_at_bytes() {
-    ///     let json_data = b"\"invalid unicode surrogate: \\uD801\"";
-    ///     let parsed: Result<ByteBuf, _> = serde_json::from_slice(json_data);
-    ///
-    ///     assert!(parsed.is_err());
-    ///
-    ///     let expected_msg = "unexpected end of hex escape at line 1 column 35";
-    ///     assert_eq!(expected_msg, parsed.unwrap_err().to_string());
-    /// }
-    /// #
-    /// # look_at_bytes();
-    /// ```
+    /// Deserialize bytes according to the deserializer's byte mode.
     fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
     where
         V: de::Visitor<'de>,
@@ -1418,9 +1364,23 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
             b'"' => {
                 self.eat_char();
                 self.scratch.clear();
-                match tri!(self.read.parse_str_raw(&mut self.scratch)) {
-                    Reference::Borrowed(b) => visitor.visit_borrowed_bytes(b),
-                    Reference::Copied(b) => visitor.visit_bytes(b),
+                match self.bytes_mode {
+                    BytesMode::IntegerArray => {
+                        match tri!(self.read.parse_str_raw(&mut self.scratch)) {
+                            Reference::Borrowed(b) => visitor.visit_borrowed_bytes(b),
+                            Reference::Copied(b) => visitor.visit_bytes(b),
+                        }
+                    }
+                    #[cfg(feature = "base64")]
+                    BytesMode::Base64 => {
+                        let string = self.read.parse_str(&mut self.scratch)?;
+                        visitor.visit_bytes(&string.from_base64().map_err(|_| {
+                            de::Error::invalid_value(
+                                de::Unexpected::Str(&string),
+                                &"base64 encoded string",
+                            )
+                        })?)
+                    }
                 }
             }
             b'[' => self.deserialize_seq(visitor),