diff --git a/src/parser.rs b/src/parser.rs index 995ba0d..81375a2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -58,6 +58,7 @@ enum SpecificError { ExpectedWhitespace, ExpectedDocumentTypeName, + ExpectedIntSubset, ExpectedSystemLiteral, ExpectedClosingQuote(&'static str), @@ -151,6 +152,7 @@ impl error::Error for SpecificError { ExpectedYesNo => "expected yes or no", ExpectedWhitespace => "expected whitespace", ExpectedDocumentTypeName => "expected document type name", + ExpectedIntSubset => "expected int subset", ExpectedSystemLiteral => "expected system literal", ExpectedClosingQuote(_) => "expected closing quote", ExpectedOpeningQuote(_) => "expected opening quote", @@ -262,6 +264,7 @@ trait PrivateXmlParseExt<'a> { fn consume_hex_chars(&self) -> XmlProgress<'a, &'a str>; fn consume_char_data(&self) -> XmlProgress<'a, &'a str>; fn consume_cdata(&self) -> XmlProgress<'a, &'a str>; + fn consume_int_subset(&self) -> XmlProgress<'a, &'a str>; fn consume_comment(&self) -> XmlProgress<'a, &'a str>; fn consume_pi_value(&self) -> XmlProgress<'a, &'a str>; fn consume_start_tag(&self) -> XmlProgress<'a, &'a str>; @@ -289,6 +292,10 @@ impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> { self.consume_to(self.s.end_of_cdata()).map_err(|_| SpecificError::ExpectedCData) } + fn consume_int_subset(&self) -> XmlProgress<'a, &'a str> { + self.consume_to(self.s.end_of_int_subset()).map_err(|_| SpecificError::ExpectedIntSubset) + } + fn consume_comment(&self) -> XmlProgress<'a, &'a str> { self.consume_to(self.s.end_of_comment()).map_err(|_| SpecificError::ExpectedCommentBody) } @@ -481,19 +488,38 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) let (xml, _) = try_parse!(xml.expect_literal("SYSTEM")); let (xml, _) = try_parse!(xml.expect_space()); let (xml, external_id) = try_parse!( - parse_quoted_value(pm, xml, |_, xml, _| xml.consume_name().map_err(|_| SpecificError::ExpectedSystemLiteral)) - ); + parse_quoted_value(pm, xml, |_, xml, quote| + xml.consume_attribute_value(quote).map_err(|_| SpecificError::ExpectedSystemLiteral) + ) + ); success(external_id, xml) } -/* without the optional intSubset */ +fn parse_int_subset<'a>(_pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) + -> XmlProgress<'a, &'a str> +{ + let (xml, _) = try_parse!(xml.expect_literal("[")); + let (xml, _) = xml.consume_space().optional(xml); + let (xml, elements) = try_parse!( + xml.consume_int_subset().map_err(|_| SpecificError::ExpectedIntSubset) + ); + let (xml, _) = xml.consume_space().optional(xml); + let (xml, _) = try_parse!(xml.expect_literal("]")); + let (xml, _) = xml.consume_space().optional(xml); + + success(elements, xml) +} + fn parse_document_type_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> { let (xml, _) = try_parse!(xml.expect_literal("")); success(Token::DocumentTypeDeclaration, xml) @@ -1310,8 +1336,107 @@ mod test { } #[test] - fn a_prolog_with_a_document_type_declaration() { - let package = quick_parse(""); + fn a_prolog_with_a_doc_type_declaration_external_id() { + let package = quick_parse(r#" + + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_int_subset() { + let package = quick_parse(r#" + + + + + + ]> + + Tove + Jani + Reminder + Don't forget me this weekend + + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "note"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_int_subset_trailing_ws() { + let package = quick_parse(r#" + + + + + + ] + + > + + Tove + Jani + Reminder + Don't forget me this weekend + + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "note"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_zero_def() { + let package = quick_parse(" + + "); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_zero_def_trailing_ws() { + let package = quick_parse(" + + "); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id() { + let package = quick_parse(r#" + + ]> + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id_trailing_ws() { + let package = quick_parse(r#" + + ] > + "#); let doc = package.as_document(); let top = top(&doc); diff --git a/src/str.rs b/src/str.rs index cd6fa71..a62031a 100644 --- a/src/str.rs +++ b/src/str.rs @@ -49,6 +49,8 @@ pub trait XmlStr { /// Find the end of the starting tag fn end_of_start_tag(&self) -> Option; fn end_of_encoding(&self) -> Option; + /// Find the end of the internal doc type declaration, not including the ] + fn end_of_int_subset(&self) -> Option; } impl<'a> XmlStr for &'a str { @@ -143,6 +145,8 @@ impl<'a> XmlStr for &'a str { fn end_of_encoding(&self) -> Option { self.end_of_start_rest(|c| c.is_encoding_start_char(), |c| c.is_encoding_rest_char()) } + + fn end_of_int_subset(&self) -> Option { self.find("]") } } /// Predicates used when parsing an characters in an XML document. @@ -297,4 +301,9 @@ mod test { fn end_of_char_data_includes_multiple_right_squares() { assert_eq!("hello]]world".end_of_char_data(), Some("hello]]world".len())); } + + #[test] + fn end_of_int_subset_excludes_right_square() { + assert_eq!("hello]>world".end_of_int_subset(), Some("hello".len())) + } }