Skip to content

Commit f2bda22

Browse files
author
Orion Gonzalez
committed
feat: support parsing documents
1 parent 25017a4 commit f2bda22

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

src/lib.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ pub struct Builder<'a> {
366366
strip_comments: bool,
367367
id_prefix: Option<&'a str>,
368368
generic_attribute_prefixes: Option<HashSet<&'a str>>,
369+
is_document: bool,
369370
}
370371

371372
impl<'a> Default for Builder<'a> {
@@ -486,6 +487,7 @@ impl<'a> Default for Builder<'a> {
486487
strip_comments: true,
487488
id_prefix: None,
488489
generic_attribute_prefixes: None,
490+
is_document: false,
489491
}
490492
}
491493
}
@@ -1705,6 +1707,17 @@ impl<'a> Builder<'a> {
17051707
}
17061708
}
17071709

1710+
/// Use this to parse a full document instead of a document fragment (like a div)
1711+
pub fn parse_as_document(&mut self) -> &mut Self {
1712+
// TODO: expand on this
1713+
self.add_tags(["html", "head", "link", "title", "meta", "body"])
1714+
.add_tag_attributes("meta", ["name", "content"])
1715+
.add_tag_attributes("html", ["lang"]);
1716+
1717+
self.is_document = true;
1718+
self
1719+
}
1720+
17081721
/// Sanitizes an HTML fragment in a string according to the configured options.
17091722
///
17101723
/// # Examples
@@ -1725,7 +1738,11 @@ impl<'a> Builder<'a> {
17251738
/// # }
17261739
/// # fn main() { do_main().unwrap() }
17271740
pub fn clean(&self, src: &str) -> Document {
1728-
let parser = Self::make_parser();
1741+
let parser = if self.is_document {
1742+
html::parse_document(RcDom::default(), html::ParseOpts::default())
1743+
} else {
1744+
Self::make_parser()
1745+
};
17291746
let dom = parser.one(src);
17301747
self.clean_dom(dom)
17311748
}
@@ -1788,7 +1805,10 @@ impl<'a> Builder<'a> {
17881805
.is_none());
17891806
}
17901807
for tag_name in &self.clean_content_tags {
1791-
assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1808+
assert!(
1809+
!self.tags.contains(tag_name),
1810+
"`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"
1811+
);
17921812
assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
17931813
}
17941814
let body = {

0 commit comments

Comments
 (0)