Skip to content

Commit

Permalink
Merge pull request #811 from RedPhoenixQ/error-narrowing
Browse files Browse the repository at this point in the history
Make Errors more "narrow"
  • Loading branch information
Mingun authored Oct 12, 2024
2 parents 8cb76cb + 00a0962 commit 6eea6bb
Show file tree
Hide file tree
Showing 11 changed files with 302 additions and 256 deletions.
5 changes: 5 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@
- [#227]: Split `SeError` from `DeError` in the `serialize` feature.
Serialize functions and methods now return `SeError`.
- [#810]: Return `std::io::Error` from `Writer` methods.
- [#811]: Split `NamespaceError` and `EncodingError` from `Error`.
- [#811]: Renamed `Error::EscapeError` to `Error::Escape` to match other variants.
- [#811]: Narrow down error return type from `Error` where only one variant is ever returned:
attribute related methods on `BytesStart` and `BytesDecl` returns `AttrError`

[#227]: https://github.com/tafia/quick-xml/issues/227
[#810]: https://github.com/tafia/quick-xml/pull/810
[#811]: https://github.com/tafia/quick-xml/pull/811


## 0.36.2 -- 2024-09-20
Expand Down
5 changes: 4 additions & 1 deletion examples/read_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ impl Translation {
})
} else {
dbg!("Expected Event::Start for Text, got: {:?}", &event);
let name_string = reader.decoder().decode(name.as_ref())?;
let name_string = reader
.decoder()
.decode(name.as_ref())
.map_err(quick_xml::Error::Encoding)?;
Err(AppError::NoText(name_string.into()))
}
} else {
Expand Down
71 changes: 60 additions & 11 deletions src/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
//! A module for wrappers that encode / decode data.
use std::borrow::Cow;
use std::str::Utf8Error;

#[cfg(feature = "encoding")]
use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};

#[cfg(feature = "encoding")]
use crate::Error;
use crate::Result;

/// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8.
/// See <https://unicode.org/faq/utf_bom.html#bom1>
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
Expand All @@ -21,6 +18,48 @@ pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];

/// An error when decoding or encoding
///
/// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`]
///
/// [`encoding`]: ../index.html#encoding
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum EncodingError {
/// Input was not valid UTF-8
Utf8(Utf8Error),
/// Input did not adhere to the given encoding
#[cfg(feature = "encoding")]
Other(&'static Encoding),
}

impl From<Utf8Error> for EncodingError {
#[inline]
fn from(e: Utf8Error) -> Self {
Self::Utf8(e)
}
}

impl std::error::Error for EncodingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Utf8(e) => Some(e),
#[cfg(feature = "encoding")]
Self::Other(_) => None,
}
}
}

impl std::fmt::Display for EncodingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e),
#[cfg(feature = "encoding")]
Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()),
}
}
}

/// Decoder of byte slices into strings.
///
/// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"`
Expand Down Expand Up @@ -79,7 +118,7 @@ impl Decoder {
///
/// ----
/// Returns an error in case of malformed sequences in the `bytes`.
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>, EncodingError> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));

Expand All @@ -90,7 +129,7 @@ impl Decoder {
}

/// Like [`decode`][Self::decode] but using a pre-allocated buffer.
pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<()> {
pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> {
#[cfg(not(feature = "encoding"))]
buf.push_str(std::str::from_utf8(bytes)?);

Expand All @@ -101,7 +140,10 @@ impl Decoder {
}

/// Decodes the `Cow` buffer, preserves the lifetime
pub(crate) fn decode_cow<'b>(&self, bytes: &Cow<'b, [u8]>) -> Result<Cow<'b, str>> {
pub(crate) fn decode_cow<'b>(
&self,
bytes: &Cow<'b, [u8]>,
) -> Result<Cow<'b, str>, EncodingError> {
match bytes {
Cow::Borrowed(bytes) => self.decode(bytes),
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
Expand All @@ -114,15 +156,22 @@ impl Decoder {
///
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
#[cfg(feature = "encoding")]
pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b, str>> {
pub fn decode<'b>(
bytes: &'b [u8],
encoding: &'static Encoding,
) -> Result<Cow<'b, str>, EncodingError> {
encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or(Error::NonDecodable(None))
.ok_or(EncodingError::Other(encoding))
}

/// Like [`decode`] but using a pre-allocated buffer.
#[cfg(feature = "encoding")]
pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String) -> Result<()> {
pub fn decode_into(
bytes: &[u8],
encoding: &'static Encoding,
buf: &mut String,
) -> Result<(), EncodingError> {
if encoding == UTF_8 {
buf.push_str(std::str::from_utf8(bytes)?);
return Ok(());
Expand All @@ -142,7 +191,7 @@ pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String)
debug_assert_eq!(read, bytes.len());
Ok(())
}
DecoderResult::Malformed(_, _) => Err(Error::NonDecodable(None)),
DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)),
// SAFETY: We allocate enough space above
DecoderResult::OutputFull => unreachable!(),
}
Expand Down
Loading

0 comments on commit 6eea6bb

Please sign in to comment.