@@ -366,6 +366,7 @@ pub struct Builder<'a> {
366
366
strip_comments : bool ,
367
367
id_prefix : Option < & ' a str > ,
368
368
generic_attribute_prefixes : Option < HashSet < & ' a str > > ,
369
+ is_document : bool ,
369
370
}
370
371
371
372
impl < ' a > Default for Builder < ' a > {
@@ -486,6 +487,7 @@ impl<'a> Default for Builder<'a> {
486
487
strip_comments : true ,
487
488
id_prefix : None ,
488
489
generic_attribute_prefixes : None ,
490
+ is_document : false ,
489
491
}
490
492
}
491
493
}
@@ -1705,6 +1707,17 @@ impl<'a> Builder<'a> {
1705
1707
}
1706
1708
}
1707
1709
1710
+ /// Use this to parse a full document instead of a document fragment (like a div)
1711
+ pub fn parse_as_document ( & mut self ) -> & mut Self {
1712
+ // TODO: expand on this
1713
+ self . add_tags ( [ "html" , "head" , "link" , "title" , "meta" , "body" ] )
1714
+ . add_tag_attributes ( "meta" , [ "name" , "content" ] )
1715
+ . add_tag_attributes ( "html" , [ "lang" ] ) ;
1716
+
1717
+ self . is_document = true ;
1718
+ self
1719
+ }
1720
+
1708
1721
/// Sanitizes an HTML fragment in a string according to the configured options.
1709
1722
///
1710
1723
/// # Examples
@@ -1725,7 +1738,11 @@ impl<'a> Builder<'a> {
1725
1738
/// # }
1726
1739
/// # fn main() { do_main().unwrap() }
1727
1740
pub fn clean ( & self , src : & str ) -> Document {
1728
- let parser = Self :: make_parser ( ) ;
1741
+ let parser = if self . is_document {
1742
+ html:: parse_document ( RcDom :: default ( ) , html:: ParseOpts :: default ( ) )
1743
+ } else {
1744
+ Self :: make_parser ( )
1745
+ } ;
1729
1746
let dom = parser. one ( src) ;
1730
1747
self . clean_dom ( dom)
1731
1748
}
@@ -1788,7 +1805,10 @@ impl<'a> Builder<'a> {
1788
1805
. is_none( ) ) ;
1789
1806
}
1790
1807
for tag_name in & self . clean_content_tags {
1791
- assert ! ( !self . tags. contains( tag_name) , "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time" ) ;
1808
+ assert ! (
1809
+ !self . tags. contains( tag_name) ,
1810
+ "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"
1811
+ ) ;
1792
1812
assert ! ( !self . tag_attributes. contains_key( tag_name) , "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time" ) ;
1793
1813
}
1794
1814
let body = {
0 commit comments