diff --git a/src/parser.rs b/src/parser.rs
index 995ba0d..81375a2 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -58,6 +58,7 @@ enum SpecificError {
ExpectedWhitespace,
ExpectedDocumentTypeName,
+ ExpectedIntSubset,
ExpectedSystemLiteral,
ExpectedClosingQuote(&'static str),
@@ -151,6 +152,7 @@ impl error::Error for SpecificError {
ExpectedYesNo => "expected yes or no",
ExpectedWhitespace => "expected whitespace",
ExpectedDocumentTypeName => "expected document type name",
+ ExpectedIntSubset => "expected int subset",
ExpectedSystemLiteral => "expected system literal",
ExpectedClosingQuote(_) => "expected closing quote",
ExpectedOpeningQuote(_) => "expected opening quote",
@@ -262,6 +264,7 @@ trait PrivateXmlParseExt<'a> {
fn consume_hex_chars(&self) -> XmlProgress<'a, &'a str>;
fn consume_char_data(&self) -> XmlProgress<'a, &'a str>;
fn consume_cdata(&self) -> XmlProgress<'a, &'a str>;
+ fn consume_int_subset(&self) -> XmlProgress<'a, &'a str>;
fn consume_comment(&self) -> XmlProgress<'a, &'a str>;
fn consume_pi_value(&self) -> XmlProgress<'a, &'a str>;
fn consume_start_tag(&self) -> XmlProgress<'a, &'a str>;
@@ -289,6 +292,10 @@ impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> {
self.consume_to(self.s.end_of_cdata()).map_err(|_| SpecificError::ExpectedCData)
}
+ fn consume_int_subset(&self) -> XmlProgress<'a, &'a str> {
+ self.consume_to(self.s.end_of_int_subset()).map_err(|_| SpecificError::ExpectedIntSubset)
+ }
+
fn consume_comment(&self) -> XmlProgress<'a, &'a str> {
self.consume_to(self.s.end_of_comment()).map_err(|_| SpecificError::ExpectedCommentBody)
}
@@ -481,19 +488,38 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>)
let (xml, _) = try_parse!(xml.expect_literal("SYSTEM"));
let (xml, _) = try_parse!(xml.expect_space());
let (xml, external_id) = try_parse!(
- parse_quoted_value(pm, xml, |_, xml, _| xml.consume_name().map_err(|_| SpecificError::ExpectedSystemLiteral))
- );
+ parse_quoted_value(pm, xml, |_, xml, quote|
+ xml.consume_attribute_value(quote).map_err(|_| SpecificError::ExpectedSystemLiteral)
+ )
+ );
success(external_id, xml)
}
-/* without the optional intSubset */
+fn parse_int_subset<'a>(_pm: &mut XmlMaster<'a>, xml: StringPoint<'a>)
+ -> XmlProgress<'a, &'a str>
+{
+ let (xml, _) = try_parse!(xml.expect_literal("["));
+ let (xml, _) = xml.consume_space().optional(xml);
+ let (xml, elements) = try_parse!(
+ xml.consume_int_subset().map_err(|_| SpecificError::ExpectedIntSubset)
+ );
+ let (xml, _) = xml.consume_space().optional(xml);
+ let (xml, _) = try_parse!(xml.expect_literal("]"));
+ let (xml, _) = xml.consume_space().optional(xml);
+
+ success(elements, xml)
+}
+
fn parse_document_type_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> {
let (xml, _) = try_parse!(xml.expect_literal(""));
success(Token::DocumentTypeDeclaration, xml)
@@ -1310,8 +1336,107 @@ mod test {
}
#[test]
- fn a_prolog_with_a_document_type_declaration() {
- let package = quick_parse("");
+ fn a_prolog_with_a_doc_type_declaration_external_id() {
+ let package = quick_parse(r#"
+
+ "#);
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "hello");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_int_subset() {
+ let package = quick_parse(r#"
+
+
+
+
+
+ ]>
+
+ Tove
+ Jani
+ Reminder
+ Don't forget me this weekend
+
+ "#);
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "note");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_int_subset_trailing_ws() {
+ let package = quick_parse(r#"
+
+
+
+
+
+ ]
+
+ >
+
+ Tove
+ Jani
+ Reminder
+ Don't forget me this weekend
+
+ "#);
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "note");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_zero_def() {
+ let package = quick_parse("
+
+ ");
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "hello");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_zero_def_trailing_ws() {
+ let package = quick_parse("
+
+ ");
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "hello");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id() {
+ let package = quick_parse(r#"
+
+ ]>
+ "#);
+ let doc = package.as_document();
+ let top = top(&doc);
+
+ assert_qname_eq!(top.name(), "hello");
+ }
+
+ #[test]
+ fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id_trailing_ws() {
+ let package = quick_parse(r#"
+
+ ] >
+ "#);
let doc = package.as_document();
let top = top(&doc);
diff --git a/src/str.rs b/src/str.rs
index cd6fa71..a62031a 100644
--- a/src/str.rs
+++ b/src/str.rs
@@ -49,6 +49,8 @@ pub trait XmlStr {
/// Find the end of the starting tag
fn end_of_start_tag(&self) -> Option;
fn end_of_encoding(&self) -> Option;
+ /// Find the end of the internal doc type declaration, not including the ]
+ fn end_of_int_subset(&self) -> Option;
}
impl<'a> XmlStr for &'a str {
@@ -143,6 +145,8 @@ impl<'a> XmlStr for &'a str {
fn end_of_encoding(&self) -> Option {
self.end_of_start_rest(|c| c.is_encoding_start_char(), |c| c.is_encoding_rest_char())
}
+
+ fn end_of_int_subset(&self) -> Option { self.find("]") }
}
/// Predicates used when parsing an characters in an XML document.
@@ -297,4 +301,9 @@ mod test {
fn end_of_char_data_includes_multiple_right_squares() {
assert_eq!("hello]]world".end_of_char_data(), Some("hello]]world".len()));
}
+
+ #[test]
+ fn end_of_int_subset_excludes_right_square() {
+ assert_eq!("hello]>world".end_of_int_subset(), Some("hello".len()))
+ }
}