diff --git a/Changelog.md b/Changelog.md index 33e5e96b..b25b1c52 100644 --- a/Changelog.md +++ b/Changelog.md @@ -18,9 +18,12 @@ ### Bug Fixes +- [#604]: Avoid crashing on wrong comments like `` when using `read_event_into*` functions. + ### Misc Changes +[#604]: https://github.com/tafia/quick-xml/issue/604 [#609]: https://github.com/tafia/quick-xml/pull/609 [#615]: https://github.com/tafia/quick-xml/pull/615 diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index ee58aed8..c3cec060 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -117,7 +117,9 @@ macro_rules! impl_buffered_source { // somewhere sane rather than at the EOF Ok(n) if n.is_empty() => return Err(bang_type.to_err()), Ok(available) => { - if let Some((consumed, used)) = bang_type.parse(buf, available) { + // We only parse from start because we don't want to consider + // whatever is in the buffer before the bang element + if let Some((consumed, used)) = bang_type.parse(&buf[start..], available) { buf.extend_from_slice(consumed); self $(.$reader)? .consume(used); diff --git a/tests/issues.rs b/tests/issues.rs index a19355ba..90efc732 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -4,7 +4,7 @@ use std::sync::mpsc; -use quick_xml::events::{BytesStart, Event}; +use quick_xml::events::{BytesDecl, BytesStart, BytesText, Event}; use quick_xml::name::QName; use quick_xml::reader::Reader; use quick_xml::Error; @@ -98,10 +98,90 @@ mod issue514 { assert_eq!(found, "other-tag"); } x => panic!( - r#"Expected `Err(EndEventMismatch("some-tag", "other-tag")))`, but found {:?}"#, + r#"Expected `Err(EndEventMismatch("some-tag", "other-tag"))`, but found {:?}"#, x ), } assert_eq!(reader.read_event().unwrap(), Event::Eof); } } + +/// Regression test for https://github.com/tafia/quick-xml/issues/604 +mod issue604 { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn short() { + let data = b""; + let mut reader = Reader::from_reader(data.as_slice()); + let mut buf = Vec::new(); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(BytesDecl::new("1.0", None, None)) + ); + match reader.read_event_into(&mut buf) { + Err(Error::UnexpectedEof(reason)) => assert_eq!(reason, "Comment"), + x => panic!( + r#"Expected `Err(UnexpectedEof("Comment"))`, but found {:?}"#, + x + ), + } + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + + #[test] + fn long() { + let data = b""; + let mut reader = Reader::from_reader(data.as_slice()); + let mut buf = Vec::new(); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(BytesDecl::new("1.0", None, None)) + ); + match reader.read_event_into(&mut buf) { + Err(Error::UnexpectedEof(reason)) => assert_eq!(reason, "Comment"), + x => panic!( + r#"Expected `Err(UnexpectedEof("Comment"))`, but found {:?}"#, + x + ), + } + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + + /// According to the grammar, `>` is allowed just in start of comment. + /// See https://www.w3.org/TR/xml11/#sec-comments + #[test] + fn short_valid() { + let data = b"-->"; + let mut reader = Reader::from_reader(data.as_slice()); + let mut buf = Vec::new(); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(BytesDecl::new("1.0", None, None)) + ); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Comment(BytesText::from_escaped(">")) + ); + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + + /// According to the grammar, `->` is allowed just in start of comment. + /// See https://www.w3.org/TR/xml11/#sec-comments + #[test] + fn long_valid() { + let data = b"-->"; + let mut reader = Reader::from_reader(data.as_slice()); + let mut buf = Vec::new(); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Decl(BytesDecl::new("1.0", None, None)) + ); + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Comment(BytesText::from_escaped("->")) + ); + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } +}