diff --git a/Changelog.md b/Changelog.md index f43f525e..0da34663 100644 --- a/Changelog.md +++ b/Changelog.md @@ -22,9 +22,14 @@ - [#227]: Split `SeError` from `DeError` in the `serialize` feature. Serialize functions and methods now return `SeError`. - [#810]: Return `std::io::Error` from `Writer` methods. +- [#811]: Split `NamespaceError` and `EncodingError` from `Error`. +- [#811]: Renamed `Error::EscapeError` to `Error::Escape` to match other variants. +- [#811]: Narrow down error return type from `Error` where only one variant is ever returned: + attribute related methods on `BytesStart` and `BytesDecl` returns `AttrError` [#227]: https://github.com/tafia/quick-xml/issues/227 [#810]: https://github.com/tafia/quick-xml/pull/810 +[#811]: https://github.com/tafia/quick-xml/pull/811 ## 0.36.2 -- 2024-09-20 diff --git a/examples/read_nodes.rs b/examples/read_nodes.rs index 6c32f486..50a5f90d 100644 --- a/examples/read_nodes.rs +++ b/examples/read_nodes.rs @@ -90,7 +90,10 @@ impl Translation { }) } else { dbg!("Expected Event::Start for Text, got: {:?}", &event); - let name_string = reader.decoder().decode(name.as_ref())?; + let name_string = reader + .decoder() + .decode(name.as_ref()) + .map_err(quick_xml::Error::Encoding)?; Err(AppError::NoText(name_string.into())) } } else { diff --git a/src/encoding.rs b/src/encoding.rs index 120d793b..44f6c82d 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -1,14 +1,11 @@ //! A module for wrappers that encode / decode data. use std::borrow::Cow; +use std::str::Utf8Error; #[cfg(feature = "encoding")] use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8}; -#[cfg(feature = "encoding")] -use crate::Error; -use crate::Result; - /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8. /// See pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; @@ -21,6 +18,48 @@ pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE]; #[cfg(feature = "encoding")] pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF]; +/// An error when decoding or encoding +/// +/// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`] +/// +/// [`encoding`]: ../index.html#encoding +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum EncodingError { + /// Input was not valid UTF-8 + Utf8(Utf8Error), + /// Input did not adhere to the given encoding + #[cfg(feature = "encoding")] + Other(&'static Encoding), +} + +impl From for EncodingError { + #[inline] + fn from(e: Utf8Error) -> Self { + Self::Utf8(e) + } +} + +impl std::error::Error for EncodingError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Utf8(e) => Some(e), + #[cfg(feature = "encoding")] + Self::Other(_) => None, + } + } +} + +impl std::fmt::Display for EncodingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e), + #[cfg(feature = "encoding")] + Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()), + } + } +} + /// Decoder of byte slices into strings. /// /// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"` @@ -79,7 +118,7 @@ impl Decoder { /// /// ---- /// Returns an error in case of malformed sequences in the `bytes`. - pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result> { + pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result, EncodingError> { #[cfg(not(feature = "encoding"))] let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); @@ -90,7 +129,7 @@ impl Decoder { } /// Like [`decode`][Self::decode] but using a pre-allocated buffer. - pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<()> { + pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> { #[cfg(not(feature = "encoding"))] buf.push_str(std::str::from_utf8(bytes)?); @@ -101,7 +140,10 @@ impl Decoder { } /// Decodes the `Cow` buffer, preserves the lifetime - pub(crate) fn decode_cow<'b>(&self, bytes: &Cow<'b, [u8]>) -> Result> { + pub(crate) fn decode_cow<'b>( + &self, + bytes: &Cow<'b, [u8]>, + ) -> Result, EncodingError> { match bytes { Cow::Borrowed(bytes) => self.decode(bytes), // Convert to owned, because otherwise Cow will be bound with wrong lifetime @@ -114,15 +156,22 @@ impl Decoder { /// /// Returns an error in case of malformed or non-representable sequences in the `bytes`. #[cfg(feature = "encoding")] -pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result> { +pub fn decode<'b>( + bytes: &'b [u8], + encoding: &'static Encoding, +) -> Result, EncodingError> { encoding .decode_without_bom_handling_and_without_replacement(bytes) - .ok_or(Error::NonDecodable(None)) + .ok_or(EncodingError::Other(encoding)) } /// Like [`decode`] but using a pre-allocated buffer. #[cfg(feature = "encoding")] -pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String) -> Result<()> { +pub fn decode_into( + bytes: &[u8], + encoding: &'static Encoding, + buf: &mut String, +) -> Result<(), EncodingError> { if encoding == UTF_8 { buf.push_str(std::str::from_utf8(bytes)?); return Ok(()); @@ -142,7 +191,7 @@ pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String) debug_assert_eq!(read, bytes.len()); Ok(()) } - DecoderResult::Malformed(_, _) => Err(Error::NonDecodable(None)), + DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)), // SAFETY: We allocate enough space above DecoderResult::OutputFull => unreachable!(), } diff --git a/src/errors.rs b/src/errors.rs index 43a00959..6b661020 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,14 +1,11 @@ //! Error management module -use crate::encoding::Decoder; +use crate::encoding::{Decoder, EncodingError}; use crate::escape::EscapeError; use crate::events::attributes::AttrError; -use crate::name::QName; -use crate::utils::write_byte_string; +use crate::name::{NamespaceError, QName}; use std::fmt; use std::io::Error as IoError; -use std::str::Utf8Error; -use std::string::FromUtf8Error; use std::sync::Arc; /// An error returned if parsed document does not correspond to the XML grammar, @@ -123,15 +120,14 @@ impl fmt::Display for IllFormedError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::MissingDeclVersion(None) => { - write!(f, "an XML declaration does not contain `version` attribute") + f.write_str("an XML declaration does not contain `version` attribute") } Self::MissingDeclVersion(Some(attr)) => { write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr) } - Self::MissingDoctypeName => write!( - f, - "`` declaration does not contain a name of a document type" - ), + Self::MissingDoctypeName => { + f.write_str("`` declaration does not contain a name of a document type") + } Self::MissingEndTag(tag) => write!( f, "start tag not closed: `` not found before end of input", @@ -146,7 +142,7 @@ impl fmt::Display for IllFormedError { expected, found, ), Self::DoubleHyphenInComment => { - write!(f, "forbidden string `--` was found in a comment") + f.write_str("forbidden string `--` was found in a comment") } } } @@ -167,33 +163,14 @@ pub enum Error { Syntax(SyntaxError), /// The document is not [well-formed](https://www.w3.org/TR/xml11/#dt-wellformed). IllFormed(IllFormedError), - /// Input decoding error. If [`encoding`] feature is disabled, contains `None`, - /// otherwise contains the UTF-8 decoding error - /// - /// [`encoding`]: index.html#encoding - NonDecodable(Option), /// Attribute parsing error InvalidAttr(AttrError), + /// Encoding error + Encoding(EncodingError), /// Escape error - EscapeError(EscapeError), - /// Specified namespace prefix is unknown, cannot resolve namespace for it - UnknownPrefix(Vec), - /// Error for when a reserved namespace is set incorrectly. - /// - /// This error returned in following cases: - /// - the XML document attempts to bind `xml` prefix to something other than - /// `http://www.w3.org/XML/1998/namespace` - /// - the XML document attempts to bind `xmlns` prefix - /// - the XML document attempts to bind some prefix (except `xml`) to - /// `http://www.w3.org/XML/1998/namespace` - /// - the XML document attempts to bind some prefix to - /// `http://www.w3.org/2000/xmlns/` - InvalidPrefixBind { - /// The prefix that is tried to be bound - prefix: Vec, - /// Namespace to which prefix tried to be bound - namespace: Vec, - }, + Escape(EscapeError), + /// Parsed XML has some namespace-related problems + Namespace(NamespaceError), } impl Error { @@ -209,7 +186,7 @@ impl From for Error { /// Creates a new `Error::Io` from the given error #[inline] fn from(error: IoError) -> Error { - Error::Io(Arc::new(error)) + Self::Io(Arc::new(error)) } } @@ -229,19 +206,11 @@ impl From for Error { } } -impl From for Error { - /// Creates a new `Error::NonDecodable` from the given error +impl From for Error { + /// Creates a new `Error::EncodingError` from the given error #[inline] - fn from(error: Utf8Error) -> Error { - Error::NonDecodable(Some(error)) - } -} - -impl From for Error { - /// Creates a new `Error::Utf8` from the given error - #[inline] - fn from(error: FromUtf8Error) -> Error { - error.utf8_error().into() + fn from(error: EncodingError) -> Error { + Self::Encoding(error) } } @@ -249,14 +218,21 @@ impl From for Error { /// Creates a new `Error::EscapeError` from the given error #[inline] fn from(error: EscapeError) -> Error { - Error::EscapeError(error) + Self::Escape(error) } } impl From for Error { #[inline] fn from(error: AttrError) -> Self { - Error::InvalidAttr(error) + Self::InvalidAttr(error) + } +} + +impl From for Error { + #[inline] + fn from(error: NamespaceError) -> Self { + Self::Namespace(error) } } @@ -266,25 +242,13 @@ pub type Result = std::result::Result; impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Error::Io(e) => write!(f, "I/O error: {}", e), - Error::Syntax(e) => write!(f, "syntax error: {}", e), - Error::IllFormed(e) => write!(f, "ill-formed document: {}", e), - Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"), - Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e), - Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e), - Error::EscapeError(e) => write!(f, "{}", e), - Error::UnknownPrefix(prefix) => { - f.write_str("Unknown namespace prefix '")?; - write_byte_string(f, prefix)?; - f.write_str("'") - } - Error::InvalidPrefixBind { prefix, namespace } => { - f.write_str("The namespace prefix '")?; - write_byte_string(f, prefix)?; - f.write_str("' cannot be bound to '")?; - write_byte_string(f, namespace)?; - f.write_str("'") - } + Self::Io(e) => write!(f, "I/O error: {}", e), + Self::Syntax(e) => write!(f, "syntax error: {}", e), + Self::IllFormed(e) => write!(f, "ill-formed document: {}", e), + Self::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e), + Self::Encoding(e) => e.fmt(f), + Self::Escape(e) => e.fmt(f), + Self::Namespace(e) => e.fmt(f), } } } @@ -292,13 +256,13 @@ impl fmt::Display for Error { impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - Error::Io(e) => Some(e), - Error::Syntax(e) => Some(e), - Error::IllFormed(e) => Some(e), - Error::NonDecodable(Some(e)) => Some(e), - Error::InvalidAttr(e) => Some(e), - Error::EscapeError(e) => Some(e), - _ => None, + Self::Io(e) => Some(e), + Self::Syntax(e) => Some(e), + Self::IllFormed(e) => Some(e), + Self::InvalidAttr(e) => Some(e), + Self::Encoding(e) => Some(e), + Self::Escape(e) => Some(e), + Self::Namespace(e) => Some(e), } } } @@ -308,10 +272,12 @@ pub mod serialize { //! A module to handle serde (de)serialization errors use super::*; + use crate::utils::write_byte_string; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::num::{ParseFloatError, ParseIntError}; + use std::str::Utf8Error; /// (De)serialization error #[derive(Clone, Debug)] @@ -354,30 +320,30 @@ pub mod serialize { impl fmt::Display for DeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - DeError::Custom(s) => write!(f, "{}", s), - DeError::InvalidXml(e) => write!(f, "{}", e), - DeError::InvalidInt(e) => write!(f, "{}", e), - DeError::InvalidFloat(e) => write!(f, "{}", e), - DeError::InvalidBoolean(v) => write!(f, "Invalid boolean value '{}'", v), - DeError::KeyNotRead => write!(f, "Invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), - DeError::UnexpectedStart(e) => { - f.write_str("Unexpected `Event::Start(")?; + Self::Custom(s) => f.write_str(s), + Self::InvalidXml(e) => e.fmt(f), + Self::InvalidInt(e) => write!(f, "invalid integral value: {}", e), + Self::InvalidFloat(e) => write!(f, "invalid floating-point value: {}", e), + Self::InvalidBoolean(v) => write!(f, "invalid boolean value '{}'", v), + Self::KeyNotRead => f.write_str("invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), + Self::UnexpectedStart(e) => { + f.write_str("unexpected `Event::Start(")?; write_byte_string(f, e)?; f.write_str(")`") } - DeError::UnexpectedEof => write!(f, "Unexpected `Event::Eof`"), + Self::UnexpectedEof => f.write_str("unexpected `Event::Eof`"), #[cfg(feature = "overlapped-lists")] - DeError::TooManyEvents(s) => write!(f, "Deserializer buffers {} events, limit exceeded", s), + Self::TooManyEvents(s) => write!(f, "deserializer buffered {} events, limit exceeded", s), } } } - impl ::std::error::Error for DeError { + impl std::error::Error for DeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - DeError::InvalidXml(e) => Some(e), - DeError::InvalidInt(e) => Some(e), - DeError::InvalidFloat(e) => Some(e), + Self::InvalidXml(e) => Some(e), + Self::InvalidInt(e) => Some(e), + Self::InvalidFloat(e) => Some(e), _ => None, } } @@ -385,7 +351,7 @@ pub mod serialize { impl serde::de::Error for DeError { fn custom(msg: T) -> Self { - DeError::Custom(msg.to_string()) + Self::Custom(msg.to_string()) } } @@ -403,16 +369,9 @@ pub mod serialize { } } - impl From for DeError { - #[inline] - fn from(e: Utf8Error) -> Self { - Self::InvalidXml(e.into()) - } - } - - impl From for DeError { + impl From for DeError { #[inline] - fn from(e: FromUtf8Error) -> Self { + fn from(e: EncodingError) -> Self { Self::InvalidXml(e.into()) } } @@ -465,11 +424,11 @@ pub mod serialize { impl fmt::Display for SeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - SeError::Custom(s) => write!(f, "{}", s), - SeError::Io(e) => write!(f, "I/O error: {}", e), - SeError::Fmt(e) => write!(f, "formatting error: {}", e), - SeError::Unsupported(s) => write!(f, "unsupported value: {}", s), - SeError::NonEncodable(e) => write!(f, "malformed UTF-8: {}", e), + Self::Custom(s) => f.write_str(s), + Self::Io(e) => write!(f, "I/O error: {}", e), + Self::Fmt(e) => write!(f, "formatting error: {}", e), + Self::Unsupported(s) => write!(f, "unsupported value: {}", s), + Self::NonEncodable(e) => write!(f, "malformed UTF-8: {}", e), } } } @@ -477,7 +436,7 @@ pub mod serialize { impl ::std::error::Error for SeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - SeError::Io(e) => Some(e), + Self::Io(e) => Some(e), _ => None, } } @@ -485,7 +444,7 @@ pub mod serialize { impl serde::ser::Error for SeError { fn custom(msg: T) -> Self { - SeError::Custom(msg.to_string()) + Self::Custom(msg.to_string()) } } diff --git a/src/escape.rs b/src/escape.rs index 76c7f7e6..bd5cfbe3 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -55,15 +55,15 @@ pub enum EscapeError { impl std::fmt::Display for EscapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - EscapeError::UnrecognizedEntity(rge, res) => { + Self::UnrecognizedEntity(rge, res) => { write!(f, "at {:?}: unrecognized entity `{}`", rge, res) } - EscapeError::UnterminatedEntity(e) => write!( + Self::UnterminatedEntity(e) => write!( f, "Error while escaping character at range {:?}: Cannot find ';' after '&'", e ), - EscapeError::InvalidCharRef(e) => { + Self::InvalidCharRef(e) => { write!(f, "invalid character reference: {}", e) } } diff --git a/src/events/mod.rs b/src/events/mod.rs index c9acb5b7..15e3b0b1 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -45,8 +45,8 @@ use std::mem::replace; use std::ops::Deref; use std::str::from_utf8; -use crate::encoding::Decoder; -use crate::errors::{Error, IllFormedError, Result}; +use crate::encoding::{Decoder, EncodingError}; +use crate::errors::{Error, IllFormedError}; use crate::escape::{ escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with, }; @@ -54,7 +54,7 @@ use crate::name::{LocalName, QName}; #[cfg(feature = "serialize")] use crate::utils::CowRef; use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string}; -use attributes::{Attribute, Attributes}; +use attributes::{AttrError, Attribute, Attributes}; /// Opening tag data (`Event::Start`), with optional attributes: ``. /// @@ -297,7 +297,7 @@ impl<'a> BytesStart<'a> { pub fn try_get_attribute + Sized>( &'a self, attr_name: N, - ) -> Result>> { + ) -> Result>, AttrError> { for a in self.attributes().with_checks(false) { let a = a?; if a.key.as_ref() == attr_name.as_ref() { @@ -583,7 +583,7 @@ impl<'a> BytesText<'a> { /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. - pub fn unescape(&self) -> Result> { + pub fn unescape(&self) -> Result, Error> { self.unescape_with(resolve_predefined_entity) } @@ -594,7 +594,7 @@ impl<'a> BytesText<'a> { pub fn unescape_with<'entity>( &self, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, - ) -> Result> { + ) -> Result, Error> { let decoded = self.decoder.decode_cow(&self.content)?; match unescape_with(&decoded, resolve_entity)? { @@ -743,7 +743,7 @@ impl<'a> BytesCData<'a> { /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` - pub fn escape(self) -> Result> { + pub fn escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match escape(&decoded) { @@ -768,7 +768,7 @@ impl<'a> BytesCData<'a> { /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` - pub fn partial_escape(self) -> Result> { + pub fn partial_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match partial_escape(&decoded) { @@ -792,7 +792,7 @@ impl<'a> BytesCData<'a> { /// | `&` | `&` /// /// [specification]: https://www.w3.org/TR/xml11/#syntax - pub fn minimal_escape(self) -> Result> { + pub fn minimal_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match minimal_escape(&decoded) { @@ -805,8 +805,8 @@ impl<'a> BytesCData<'a> { } /// Gets content of this text buffer in the specified encoding - pub(crate) fn decode(&self) -> Result> { - self.decoder.decode_cow(&self.content) + pub(crate) fn decode(&self) -> Result, EncodingError> { + Ok(self.decoder.decode_cow(&self.content)?) } } @@ -1136,13 +1136,15 @@ impl<'a> BytesDecl<'a> { /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn version(&self) -> Result> { + pub fn version(&self) -> Result, Error> { // The version *must* be the first thing in the declaration. match self.content.attributes().with_checks(false).next() { Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), // first attribute was not "version" Some(Ok(a)) => { - let found = from_utf8(a.key.as_ref())?.to_string(); + let found = from_utf8(a.key.as_ref()) + .map_err(|_| IllFormedError::MissingDeclVersion(None))? + .to_string(); Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some( found, )))) @@ -1189,7 +1191,7 @@ impl<'a> BytesDecl<'a> { /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn encoding(&self) -> Option>> { + pub fn encoding(&self) -> Option, AttrError>> { self.content .try_get_attribute("encoding") .map(|a| a.map(|a| a.value)) @@ -1231,7 +1233,7 @@ impl<'a> BytesDecl<'a> { /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl - pub fn standalone(&self) -> Option>> { + pub fn standalone(&self) -> Option, AttrError>> { self.content .try_get_attribute("standalone") .map(|a| a.map(|a| a.value)) diff --git a/src/name.rs b/src/name.rs index 719436c1..b7a8de16 100644 --- a/src/name.rs +++ b/src/name.rs @@ -3,13 +3,80 @@ //! //! [spec]: https://www.w3.org/TR/xml-names11 -use crate::errors::{Error, Result}; use crate::events::attributes::Attribute; use crate::events::BytesStart; use crate::utils::write_byte_string; use memchr::memchr; use std::fmt::{self, Debug, Formatter}; +/// Some namespace was invalid +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NamespaceError { + /// Specified namespace prefix is unknown, cannot resolve namespace for it + UnknownPrefix(Vec), + /// Attempts to bind the `xml` prefix to something other than `http://www.w3.org/XML/1998/namespace`. + /// + /// `xml` prefix can be bound only to `http://www.w3.org/XML/1998/namespace`. + /// + /// Contains the namespace to which `xml` tried to be bound. + InvalidXmlPrefixBind(Vec), + /// Attempts to bind the `xmlns` prefix. + /// + /// `xmlns` prefix is always bound to `http://www.w3.org/2000/xmlns/` and cannot be bound + /// to any other namespace or even to `http://www.w3.org/2000/xmlns/`. + /// + /// Contains the namespace to which `xmlns` tried to be bound. + InvalidXmlnsPrefixBind(Vec), + /// Attempts to bind some prefix (except `xml`) to `http://www.w3.org/XML/1998/namespace`. + /// + /// Only `xml` prefix can be bound to `http://www.w3.org/XML/1998/namespace`. + /// + /// Contains the prefix that is tried to be bound. + InvalidPrefixForXml(Vec), + /// Attempts to bind some prefix to `http://www.w3.org/2000/xmlns/`. + /// + /// `http://www.w3.org/2000/xmlns/` cannot be bound to any prefix, even to `xmlns`. + /// + /// Contains the prefix that is tried to be bound. + InvalidPrefixForXmlns(Vec), +} + +impl fmt::Display for NamespaceError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::UnknownPrefix(prefix) => { + f.write_str("unknown namespace prefix '")?; + write_byte_string(f, prefix)?; + f.write_str("'") + } + Self::InvalidXmlPrefixBind(namespace) => { + f.write_str("the namespace prefix 'xml' cannot be bound to '")?; + write_byte_string(f, namespace)?; + f.write_str("'") + } + Self::InvalidXmlnsPrefixBind(namespace) => { + f.write_str("the namespace prefix 'xmlns' cannot be bound to '")?; + write_byte_string(f, namespace)?; + f.write_str("'") + } + Self::InvalidPrefixForXml(prefix) => { + f.write_str("the namespace prefix '")?; + write_byte_string(f, prefix)?; + f.write_str("' cannot be bound to 'http://www.w3.org/XML/1998/namespace'") + } + Self::InvalidPrefixForXmlns(prefix) => { + f.write_str("the namespace prefix '")?; + write_byte_string(f, prefix)?; + f.write_str("' cannot be bound to 'http://www.w3.org/2000/xmlns/'") + } + } + } +} + +impl std::error::Error for NamespaceError {} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// A [qualified name] of an element or an attribute, including an optional /// namespace [prefix](Prefix) and a [local name](LocalName). /// @@ -307,17 +374,17 @@ impl<'ns> Debug for ResolveResult<'ns> { } impl<'ns> TryFrom> for Option> { - type Error = Error; + type Error = NamespaceError; /// Try to convert this result to an optional namespace and returns - /// [`Error::UnknownPrefix`] if this result represents unknown prefix - fn try_from(result: ResolveResult<'ns>) -> Result { + /// [`NamespaceError::UnknownPrefix`] if this result represents unknown prefix + fn try_from(result: ResolveResult<'ns>) -> Result { use ResolveResult::*; match result { Unbound => Ok(None), Bound(ns) => Ok(Some(ns)), - Unknown(p) => Err(Error::UnknownPrefix(p)), + Unknown(p) => Err(NamespaceError::UnknownPrefix(p)), } } } @@ -456,7 +523,7 @@ impl NamespaceResolver { /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl - pub fn push(&mut self, start: &BytesStart) -> Result<()> { + pub fn push(&mut self, start: &BytesStart) -> Result<(), NamespaceError> { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' @@ -477,30 +544,23 @@ impl NamespaceResolver { Some(PrefixDeclaration::Named(b"xml")) => { if Namespace(&v) != RESERVED_NAMESPACE_XML.1 { // error, `xml` prefix explicitly set to different value - return Err(Error::InvalidPrefixBind { - prefix: b"xml".to_vec(), - namespace: v.to_vec(), - }); + return Err(NamespaceError::InvalidXmlPrefixBind(v.to_vec())); } // don't add another NamespaceEntry for the `xml` namespace prefix } Some(PrefixDeclaration::Named(b"xmlns")) => { // error, `xmlns` prefix explicitly set - return Err(Error::InvalidPrefixBind { - prefix: b"xmlns".to_vec(), - namespace: v.to_vec(), - }); + return Err(NamespaceError::InvalidXmlnsPrefixBind(v.to_vec())); } Some(PrefixDeclaration::Named(prefix)) => { let ns = Namespace(&v); - if ns == RESERVED_NAMESPACE_XML.1 || ns == RESERVED_NAMESPACE_XMLNS.1 { + if ns == RESERVED_NAMESPACE_XML.1 { // error, non-`xml` prefix set to xml uri + return Err(NamespaceError::InvalidPrefixForXml(prefix.to_vec())); + } else if ns == RESERVED_NAMESPACE_XMLNS.1 { // error, non-`xmlns` prefix set to xmlns uri - return Err(Error::InvalidPrefixBind { - prefix: prefix.to_vec(), - namespace: v.to_vec(), - }); + return Err(NamespaceError::InvalidPrefixForXmlns(prefix.to_vec())); } let start = self.buffer.len(); @@ -980,19 +1040,15 @@ mod namespaces { fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content( - " xmlns:xml='not_correct_namespace'", - 0, - )) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"xml"); - assert_eq!(namespace, b"not_correct_namespace"); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content( + " xmlns:xml='not_correct_namespace'", + 0, + )), + Err(NamespaceError::InvalidXmlPrefixBind( + b"not_correct_namespace".to_vec() + )), + ); assert_eq!(&resolver.buffer[s..], b""); } @@ -1001,16 +1057,10 @@ mod namespaces { fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content(" xmlns:xml=''", 0)) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"xml"); - assert_eq!(namespace, b""); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content(" xmlns:xml=''", 0)), + Err(NamespaceError::InvalidXmlPrefixBind(b"".to_vec())), + ); assert_eq!(&resolver.buffer[s..], b""); } @@ -1019,19 +1069,13 @@ mod namespaces { fn other_prefix_bound_to_xml_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content( - " xmlns:not_xml='http://www.w3.org/XML/1998/namespace'", - 0, - )) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"not_xml"); - assert_eq!(namespace, b"http://www.w3.org/XML/1998/namespace"); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content( + " xmlns:not_xml='http://www.w3.org/XML/1998/namespace'", + 0, + )), + Err(NamespaceError::InvalidPrefixForXml(b"not_xml".to_vec())), + ); assert_eq!(&resolver.buffer[s..], b""); } } @@ -1065,19 +1109,15 @@ mod namespaces { fn rebound_to_correct_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content( - " xmlns:xmlns='http://www.w3.org/2000/xmlns/'", - 0, - )) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"xmlns"); - assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content( + " xmlns:xmlns='http://www.w3.org/2000/xmlns/'", + 0, + )), + Err(NamespaceError::InvalidXmlnsPrefixBind( + b"http://www.w3.org/2000/xmlns/".to_vec() + )), + ); assert_eq!(&resolver.buffer[s..], b""); } @@ -1086,19 +1126,15 @@ mod namespaces { fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content( - " xmlns:xmlns='not_correct_namespace'", - 0, - )) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"xmlns"); - assert_eq!(namespace, b"not_correct_namespace"); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content( + " xmlns:xmlns='not_correct_namespace'", + 0, + )), + Err(NamespaceError::InvalidXmlnsPrefixBind( + b"not_correct_namespace".to_vec() + )), + ); assert_eq!(&resolver.buffer[s..], b""); } @@ -1107,16 +1143,10 @@ mod namespaces { fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content(" xmlns:xmlns=''", 0)) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"xmlns"); - assert_eq!(namespace, b""); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content(" xmlns:xmlns=''", 0)), + Err(NamespaceError::InvalidXmlnsPrefixBind(b"".to_vec())), + ); assert_eq!(&resolver.buffer[s..], b""); } @@ -1125,19 +1155,13 @@ mod namespaces { fn other_prefix_bound_to_xmlns_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - match resolver.push(&BytesStart::from_content( - " xmlns:not_xmlns='http://www.w3.org/2000/xmlns/'", - 0, - )) { - Err(Error::InvalidPrefixBind { prefix, namespace }) => { - assert_eq!(prefix, b"not_xmlns"); - assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); - } - x => panic!( - "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", - x - ), - } + assert_eq!( + resolver.push(&BytesStart::from_content( + " xmlns:not_xmlns='http://www.w3.org/2000/xmlns/'", + 0, + )), + Err(NamespaceError::InvalidPrefixForXmlns(b"not_xmlns".to_vec())), + ); assert_eq!(&resolver.buffer[s..], b""); } } diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 6446ed8a..0136a55e 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -192,7 +192,7 @@ macro_rules! impl_buffered_source { } *position += read; - Err(bang_type.to_err()) + Err(bang_type.to_err().into()) } #[inline] diff --git a/src/reader/mod.rs b/src/reader/mod.rs index fb63b2da..a05e5bc5 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -6,7 +6,7 @@ use std::io; use std::ops::Range; use crate::encoding::Decoder; -use crate::errors::{Error, Result, SyntaxError}; +use crate::errors::{Error, SyntaxError}; use crate::events::Event; use crate::parser::{ElementParser, Parser, PiParser}; use crate::reader::state::ReaderState; @@ -894,7 +894,7 @@ impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader. - fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result> + fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result, Error> where R: XmlSource<'i, B>, { @@ -903,7 +903,7 @@ impl Reader { /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. - fn read_until_close<'i, B>(&mut self, buf: B) -> Result> + fn read_until_close<'i, B>(&mut self, buf: B) -> Result, Error> where R: XmlSource<'i, B>, { @@ -979,7 +979,7 @@ trait XmlSource<'r, B> { /// reader which provides bytes fed into the parser. /// /// [events]: crate::events::Event - fn read_with

(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8]> + fn read_with

(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error> where P: Parser; @@ -998,7 +998,11 @@ trait XmlSource<'r, B> { /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event - fn read_bang_element(&mut self, buf: B, position: &mut u64) -> Result<(BangType, &'r [u8])>; + fn read_bang_element( + &mut self, + buf: B, + position: &mut u64, + ) -> Result<(BangType, &'r [u8]), Error>; /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. @@ -1024,12 +1028,12 @@ enum BangType { } impl BangType { #[inline(always)] - const fn new(byte: Option) -> Result { + const fn new(byte: Option) -> Result { Ok(match byte { Some(b'[') => Self::CData, Some(b'-') => Self::Comment, Some(b'D') | Some(b'd') => Self::DocType(0), - _ => return Err(Error::Syntax(SyntaxError::InvalidBangMarkup)), + _ => return Err(SyntaxError::InvalidBangMarkup), }) } @@ -1101,11 +1105,11 @@ impl BangType { None } #[inline] - const fn to_err(&self) -> Error { + const fn to_err(&self) -> SyntaxError { match self { - Self::CData => Error::Syntax(SyntaxError::UnclosedCData), - Self::Comment => Error::Syntax(SyntaxError::UnclosedComment), - Self::DocType(_) => Error::Syntax(SyntaxError::UnclosedDoctype), + Self::CData => SyntaxError::UnclosedCData, + Self::Comment => SyntaxError::UnclosedComment, + Self::DocType(_) => SyntaxError::UnclosedDoctype, } } } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index e1082cac..08287592 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -233,7 +233,7 @@ impl<'a> Reader<&'a [u8]> { let len = span.end - span.start; // SAFETY: `span` can only contain indexes up to usize::MAX because it // was created from offsets from a single &[u8] slice - self.decoder().decode(&buffer[0..len as usize]) + Ok(self.decoder().decode(&buffer[0..len as usize])?) } } @@ -316,7 +316,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { } *position += self.len() as u64; - Err(bang_type.to_err()) + Err(bang_type.to_err().into()) } #[inline] diff --git a/src/reader/state.rs b/src/reader/state.rs index f906c5b6..565008ab 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -165,7 +165,7 @@ impl ReaderState { // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`. // ^------- We report error at that position, so we need to subtract 2 and buf len self.last_error_offset = self.offset - len as u64 - 2; - Err(bang_type.to_err()) + Err(bang_type.to_err().into()) } } }