Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect end position in read_to_end and read_text #773

Merged
merged 5 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@

### Bug Fixes

- [#773]: Fixed reporting incorrect end position in `Reader::read_to_end` family
of methods and trimming of the trailing spaces in `Reader::read_text` when
`trim_text_start` is set and the last event is not a `Text` event.

### Misc Changes

[#772]: https://github.com/tafia/quick-xml/pull/772
[#773]: https://github.com/tafia/quick-xml/pull/773


## 0.34.0 -- 2024-06-25
Expand Down
28 changes: 26 additions & 2 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,28 +394,52 @@ macro_rules! read_until_close {
/// Generalization of `read_to_end` method for buffered and borrowed readers
macro_rules! read_to_end {
(
// $self: &mut Reader
$self:expr, $end:expr, $buf:expr,
$read_event:ident,
// Code block that performs clearing of internal buffer after read of each event
$clear:block
$(, $await:ident)?
) => {{
// Because we take position after the event before the End event,
// it is important that this position indicates beginning of the End event.
// If between last event and the End event would be only spaces, then we
// take position before the spaces, but spaces would be skipped without
// generating event if `trim_text_start` is set to `true`. To prevent that
// we temporary disable start text trimming.
//
// We also cannot take position after getting End event, because if
// `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
// we do not known the real size of the End event that it is occupies in
// the source and cannot correct the position after the End event.
// So, we in any case should tweak parser configuration.
let config = $self.config_mut();
let trim = config.trim_text_start;
config.trim_text_start = false;

let start = $self.buffer_position();
let mut depth = 0;
loop {
$clear
let end = $self.buffer_position();
match $self.$read_event($buf) $(.$await)? {
Err(e) => return Err(e),
Err(e) => {
$self.config_mut().trim_text_start = trim;
return Err(e);
}

Ok(Event::Start(e)) if e.name() == $end => depth += 1,
Ok(Event::End(e)) if e.name() == $end => {
if depth == 0 {
$self.config_mut().trim_text_start = trim;
break start..end;
}
depth -= 1;
}
Ok(Event::Eof) => return Err(Error::missed_end($end, $self.decoder())),
Ok(Event::Eof) => {
$self.config_mut().trim_text_start = trim;
return Err(Error::missed_end($end, $self.decoder()));
}
_ => (),
}
}
Expand Down
51 changes: 49 additions & 2 deletions tests/async-tokio.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::iter;

use pretty_assertions::assert_eq;
use quick_xml::events::Event::*;
use quick_xml::events::{BytesStart, Event::*};
use quick_xml::name::QName;
use quick_xml::reader::Reader;

// Import `small_buffers_tests!`
#[macro_use]
mod reader;
mod helpers;

small_buffers_tests!(
#[tokio::test]
Expand Down Expand Up @@ -40,6 +40,53 @@ async fn test_sample() {
assert_eq!((count, reads), (1247, 5245));
}

/// This tests checks that read_to_end() correctly returns span even when
/// text is trimmed from both sides
mod read_to_end {
use super::*;
use pretty_assertions::assert_eq;

#[tokio::test]
async fn text() {
let mut r = Reader::from_str("<tag> text </tag>");
// ^0 ^5 ^11
r.config_mut().trim_text(true);

let mut buf = Vec::new();
assert_eq!(
r.read_event_into_async(&mut buf).await.unwrap(),
Start(BytesStart::new("tag"))
);
assert_eq!(
r.read_to_end_into_async(QName(b"tag"), &mut buf)
.await
.unwrap(),
5..11
);
assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof);
}

#[tokio::test]
async fn tag() {
let mut r = Reader::from_str("<tag> <nested/> </tag>");
// ^0 ^5 ^16
r.config_mut().trim_text(true);

let mut buf = Vec::new();
assert_eq!(
r.read_event_into_async(&mut buf).await.unwrap(),
Start(BytesStart::new("tag"))
);
assert_eq!(
r.read_to_end_into_async(QName(b"tag"), &mut buf)
.await
.unwrap(),
5..16
);
assert_eq!(r.read_event_into_async(&mut buf).await.unwrap(), Eof);
}
}

/// Regression test for https://github.com/tafia/quick-xml/issues/751
///
/// Actually, that error was not found in async reader, but we would to test it as well.
Expand Down
169 changes: 148 additions & 21 deletions tests/helpers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,151 @@
//! Utility functions for integration tests

use quick_xml::de::Deserializer;
use quick_xml::DeError;
use serde::Deserialize;

/// Deserialize an instance of type T from a string of XML text.
/// If deserialization was succeeded checks that all XML events was consumed
pub fn from_str<'de, T>(source: &'de str) -> Result<T, DeError>
where
T: Deserialize<'de>,
{
// Log XML that we try to deserialize to see it in the failed tests output
dbg!(source);
let mut de = Deserializer::from_str(source);
let result = T::deserialize(&mut de);

// If type was deserialized, the whole XML document should be consumed
if let Ok(_) = result {
assert!(de.is_empty(), "the whole XML document should be consumed");
}

result
/// Tests for https://github.com/tafia/quick-xml/issues/469
/// Exported to reuse in `async-tokio` tests.
#[macro_export]
macro_rules! small_buffers_tests {
(
#[$test:meta]
$read_event:ident: $BufReader:ty
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use quick_xml::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
use quick_xml::reader::Reader;
use pretty_assertions::assert_eq;

#[$test]
$($async)? fn decl() {
let xml = "<?xml ?>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn pi() {
let xml = "<?pi?>";
// ^^^^^ data that fit into buffer
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesPI::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn empty() {
let xml = "<empty/>";
// ^^^^^^^ data that fit into buffer
let size = xml.match_indices("/>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("empty"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata1() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn cdata2() {
let xml = "<![CDATA[cdata]]>";
// ^^^^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("]]>").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment1() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}

#[$test]
$($async)? fn comment2() {
let xml = "<!--comment-->";
// ^^^^^^^^^^^^^ data that fit into buffer
let size = xml.match_indices("-->").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}
Loading