From a9dfa1c42c89b3174629c16bd341dd2524514276 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Mon, 26 Aug 2024 13:48:45 +0100 Subject: [PATCH] Attribute buffer is no longer shared, removing one `RefCell`. --- src/base/align.rs | 2 +- src/parser/lexer/actions.rs | 12 ++++++++---- src/parser/lexer/lexeme/token_outline.rs | 6 +++--- src/parser/lexer/mod.rs | 14 +++----------- src/parser/mod.rs | 4 ++-- src/parser/tree_builder_simulator/mod.rs | 4 ++-- src/rewritable_units/tokens/attributes.rs | 11 +++++------ src/rewritable_units/tokens/capturer/to_token.rs | 3 +-- src/rewritable_units/tokens/start_tag.rs | 2 +- src/selectors_vm/attribute_matcher.rs | 7 +++---- src/transform_stream/dispatcher.rs | 16 ++++++++-------- src/transform_stream/mod.rs | 2 +- 12 files changed, 38 insertions(+), 45 deletions(-) diff --git a/src/base/align.rs b/src/base/align.rs index 696eaed1..dc5802e2 100644 --- a/src/base/align.rs +++ b/src/base/align.rs @@ -2,7 +2,7 @@ pub trait Align { fn align(&mut self, offset: usize); } -impl Align for Vec { +impl Align for &mut [T] { #[inline] fn align(&mut self, offset: usize) { for item in self.iter_mut() { diff --git a/src/parser/lexer/actions.rs b/src/parser/lexer/actions.rs index 70e0c919..d90b41c4 100644 --- a/src/parser/lexer/actions.rs +++ b/src/parser/lexer/actions.rs @@ -136,13 +136,11 @@ impl StateMachineActions for Lexer { #[inline] fn create_start_tag(&mut self, _context: &mut ParserContext, _input: &[u8]) { - self.attr_buffer.borrow_mut().clear(); - self.current_tag_token = Some(StartTag { name: Range::default(), name_hash: LocalNameHash::new(), ns: Namespace::default(), - attributes: Rc::clone(&self.attr_buffer), + attributes: Vec::new(), self_closing: false, }); } @@ -314,7 +312,13 @@ impl StateMachineActions for Lexer { #[inline] fn finish_attr(&mut self, _context: &mut ParserContext, _input: &[u8]) { if let Some(attr) = self.current_attr.take() { - self.attr_buffer.borrow_mut().push(attr); + match self.current_tag_token.as_mut() { + None => unreachable!(), + Some(EndTag { .. }) => unreachable!(), + Some(StartTag { attributes, .. }) => { + attributes.push(attr); + } + } } } diff --git a/src/parser/lexer/lexeme/token_outline.rs b/src/parser/lexer/lexeme/token_outline.rs index f1ad1765..97eead4d 100644 --- a/src/parser/lexer/lexeme/token_outline.rs +++ b/src/parser/lexer/lexeme/token_outline.rs @@ -1,6 +1,6 @@ use crate::base::{Align, Range}; use crate::html::{LocalNameHash, Namespace, TextType}; -use crate::parser::SharedAttributeBuffer; +use crate::parser::AttributeBuffer; #[derive(Debug, Default, Copy, Clone)] pub struct AttributeOutline { @@ -24,7 +24,7 @@ pub enum TagTokenOutline { name: Range, name_hash: LocalNameHash, ns: Namespace, - attributes: SharedAttributeBuffer, + attributes: AttributeBuffer, self_closing: bool, }, @@ -57,7 +57,7 @@ impl Align for TagTokenOutline { name, attributes, .. } => { name.align(offset); - attributes.borrow_mut().align(offset); + attributes.as_mut_slice().align(offset); } TagTokenOutline::EndTag { name, .. } => name.align(offset), } diff --git a/src/parser/lexer/mod.rs b/src/parser/lexer/mod.rs index 64682045..f2c8d88e 100644 --- a/src/parser/lexer/mod.rs +++ b/src/parser/lexer/mod.rs @@ -4,6 +4,7 @@ mod actions; mod conditions; mod lexeme; +pub use self::lexeme::*; use crate::base::{Align, Range}; use crate::html::{LocalNameHash, Namespace, TextType}; use crate::parser::state_machine::{ @@ -11,12 +12,6 @@ use crate::parser::state_machine::{ }; use crate::parser::{ParserContext, ParserDirective, ParsingAmbiguityError, TreeBuilderFeedback}; use crate::rewriter::RewritingError; -use std::cell::RefCell; -use std::rc::Rc; - -pub use self::lexeme::*; - -const DEFAULT_ATTR_BUFFER_CAPACITY: usize = 256; pub trait LexemeSink { fn handle_tag(&mut self, lexeme: &TagLexeme) -> Result; @@ -27,7 +22,8 @@ pub trait LexemeSink { } pub type State = fn(&mut Lexer, context: &mut ParserContext, &[u8]) -> StateResult; -pub type SharedAttributeBuffer = Rc>>; + +pub type AttributeBuffer = Vec; pub struct Lexer { next_pos: usize, @@ -42,7 +38,6 @@ pub struct Lexer { current_attr: Option, last_start_tag_name_hash: LocalNameHash, closing_quote: u8, - attr_buffer: SharedAttributeBuffer, last_text_type: TextType, feedback_directive: FeedbackDirective, } @@ -62,9 +57,6 @@ impl Lexer { current_attr: None, last_start_tag_name_hash: LocalNameHash::default(), closing_quote: b'"', - attr_buffer: Rc::new(RefCell::new(Vec::with_capacity( - DEFAULT_ATTR_BUFFER_CAPACITY, - ))), last_text_type: TextType::Data, feedback_directive: FeedbackDirective::None, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fdb3316d..058e56a5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,8 +7,8 @@ mod tree_builder_simulator; use self::lexer::Lexer; pub use self::lexer::{ - AttributeOutline, Lexeme, LexemeSink, NonTagContentLexeme, NonTagContentTokenOutline, - SharedAttributeBuffer, TagLexeme, TagTokenOutline, + AttributeBuffer, AttributeOutline, Lexeme, LexemeSink, NonTagContentLexeme, + NonTagContentTokenOutline, TagLexeme, TagTokenOutline, }; use self::state_machine::{ActionError, ParsingTermination, StateMachine}; pub use self::tag_scanner::TagHintSink; diff --git a/src/parser/tree_builder_simulator/mod.rs b/src/parser/tree_builder_simulator/mod.rs index d224aa78..24fa183d 100644 --- a/src/parser/tree_builder_simulator/mod.rs +++ b/src/parser/tree_builder_simulator/mod.rs @@ -250,7 +250,7 @@ impl TreeBuilderSimulator { // to decide on foreign context exit return request_lexeme(|this, lexeme| { expect_tag!(lexeme, StartTag { ref attributes, .. } => { - for attr in attributes.borrow().iter() { + for attr in attributes.iter() { let name = lexeme.part(attr.name); if eq_case_insensitive(&name, b"color") @@ -279,7 +279,7 @@ impl TreeBuilderSimulator { let name = lexeme.part(name); if !self_closing && eq_case_insensitive(&name, b"annotation-xml") { - for attr in attributes.borrow().iter() { + for attr in attributes.iter() { let name = lexeme.part(attr.name); let value = lexeme.part(attr.value); diff --git a/src/rewritable_units/tokens/attributes.rs b/src/rewritable_units/tokens/attributes.rs index 8ad8d11b..5dd66feb 100644 --- a/src/rewritable_units/tokens/attributes.rs +++ b/src/rewritable_units/tokens/attributes.rs @@ -1,5 +1,5 @@ use crate::base::Bytes; -use crate::parser::SharedAttributeBuffer; +use crate::parser::AttributeBuffer; use crate::rewritable_units::Serialize; use encoding_rs::Encoding; use lazycell::LazyCell; @@ -139,7 +139,7 @@ impl Debug for Attribute<'_> { pub struct Attributes<'i> { input: &'i Bytes<'i>, - attribute_buffer: SharedAttributeBuffer, + attribute_buffer: &'i AttributeBuffer, items: LazyCell>>, encoding: &'static Encoding, } @@ -147,7 +147,7 @@ pub struct Attributes<'i> { impl<'i> Attributes<'i> { pub(super) fn new( input: &'i Bytes<'i>, - attribute_buffer: SharedAttributeBuffer, + attribute_buffer: &'i AttributeBuffer, encoding: &'static Encoding, ) -> Self { Attributes { @@ -196,7 +196,6 @@ impl<'i> Attributes<'i> { fn init_items(&self) -> Vec> { self.attribute_buffer - .borrow() .iter() .map(|a| { Attribute::new( @@ -227,8 +226,8 @@ impl<'i> Attributes<'i> { } #[cfg(test)] - pub fn raw_attributes(&self) -> (&'i Bytes<'i>, SharedAttributeBuffer) { - (self.input, std::rc::Rc::clone(&self.attribute_buffer)) + pub fn raw_attributes(&self) -> (&'i Bytes<'i>, &'i AttributeBuffer) { + (self.input, self.attribute_buffer) } } diff --git a/src/rewritable_units/tokens/capturer/to_token.rs b/src/rewritable_units/tokens/capturer/to_token.rs index 70a2923e..ce98b67d 100644 --- a/src/rewritable_units/tokens/capturer/to_token.rs +++ b/src/rewritable_units/tokens/capturer/to_token.rs @@ -2,7 +2,6 @@ use super::*; use crate::html::TextType; use crate::parser::{NonTagContentLexeme, NonTagContentTokenOutline, TagLexeme, TagTokenOutline}; use encoding_rs::Encoding; -use std::rc::Rc; pub enum ToTokenResult<'i> { Token(Box>), @@ -44,7 +43,7 @@ impl ToToken for TagLexeme<'_> { StartTag::new_token( self.part(name), - Attributes::new(self.input(), Rc::clone(attributes), encoding), + Attributes::new(self.input(), attributes, encoding), ns, self_closing, self.raw(), diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index f36c0b6f..d182844c 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -167,7 +167,7 @@ impl<'i> StartTag<'i> { } #[cfg(test)] - pub fn raw_attributes(&self) -> (&'i Bytes<'i>, crate::parser::SharedAttributeBuffer) { + pub fn raw_attributes(&self) -> (&'i Bytes<'i>, &'i crate::parser::AttributeBuffer) { self.attributes.raw_attributes() } } diff --git a/src/selectors_vm/attribute_matcher.rs b/src/selectors_vm/attribute_matcher.rs index 3dd3cecc..3d2c3e82 100644 --- a/src/selectors_vm/attribute_matcher.rs +++ b/src/selectors_vm/attribute_matcher.rs @@ -1,7 +1,7 @@ use super::compiler::AttrExprOperands; use crate::base::Bytes; use crate::html::Namespace; -use crate::parser::{AttributeOutline, SharedAttributeBuffer}; +use crate::parser::{AttributeBuffer, AttributeOutline}; use encoding_rs::UTF_8; use lazy_static::lazy_static; use lazycell::LazyCell; @@ -22,7 +22,7 @@ type MemoizedAttrValue<'i> = LazyCell>>; pub struct AttributeMatcher<'i> { input: &'i Bytes<'i>, - attributes: SharedAttributeBuffer, + attributes: &'i AttributeBuffer, id: MemoizedAttrValue<'i>, class: MemoizedAttrValue<'i>, is_html_element: bool, @@ -30,7 +30,7 @@ pub struct AttributeMatcher<'i> { impl<'i> AttributeMatcher<'i> { #[inline] - pub fn new(input: &'i Bytes<'i>, attributes: SharedAttributeBuffer, ns: Namespace) -> Self { + pub fn new(input: &'i Bytes<'i>, attributes: &'i AttributeBuffer, ns: Namespace) -> Self { AttributeMatcher { input, attributes, @@ -43,7 +43,6 @@ impl<'i> AttributeMatcher<'i> { #[inline] fn find(&self, lowercased_name: &Bytes) -> Option { self.attributes - .borrow() .iter() .find(|a| { if lowercased_name.len() != a.name.end - a.name.start { diff --git a/src/transform_stream/dispatcher.rs b/src/transform_stream/dispatcher.rs index bd624719..f0d62d03 100644 --- a/src/transform_stream/dispatcher.rs +++ b/src/transform_stream/dispatcher.rs @@ -1,21 +1,19 @@ -use super::*; use crate::base::{Bytes, Range, SharedEncoding}; use crate::html::{LocalName, Namespace}; use crate::parser::{ - Lexeme, LexemeSink, NonTagContentLexeme, ParserDirective, ParserOutputSink, TagHintSink, - TagLexeme, TagTokenOutline, + AttributeBuffer, Lexeme, LexemeSink, NonTagContentLexeme, ParserDirective, ParserOutputSink, + TagHintSink, TagLexeme, TagTokenOutline, }; use crate::rewritable_units::{ DocumentEnd, Serialize, ToToken, Token, TokenCaptureFlags, TokenCapturer, TokenCapturerEvent, }; use crate::rewriter::RewritingError; -use std::rc::Rc; use TagTokenOutline::*; pub struct AuxStartTagInfo<'i> { pub input: &'i Bytes<'i>, - pub attr_buffer: SharedAttributeBuffer, + pub attr_buffer: &'i AttributeBuffer, pub self_closing: bool, } @@ -189,7 +187,7 @@ where &mut self.transform_controller, AuxStartTagInfo { input, - attr_buffer: Rc::clone($attributes), + attr_buffer: $attributes, self_closing: $self_closing, }, ) @@ -204,7 +202,9 @@ where ref attributes, self_closing, .. - } => get_flags_from_aux_info_res!(aux_info_req, attributes, self_closing), + } => { + get_flags_from_aux_info_res!(aux_info_req, &attributes, self_closing) + } _ => unreachable!("Tag should be a start tag at this point"), }, @@ -223,7 +223,7 @@ where match self.transform_controller.handle_start_tag(name, ns) { Ok(flags) => Ok(flags), Err(DispatcherError::InfoRequest(aux_info_req)) => { - get_flags_from_aux_info_res!(aux_info_req, attributes, self_closing) + get_flags_from_aux_info_res!(aux_info_req, &attributes, self_closing) } Err(DispatcherError::RewritingError(e)) => Err(e), } diff --git a/src/transform_stream/mod.rs b/src/transform_stream/mod.rs index 95bced01..7298be8c 100644 --- a/src/transform_stream/mod.rs +++ b/src/transform_stream/mod.rs @@ -3,7 +3,7 @@ mod dispatcher; use self::dispatcher::Dispatcher; use crate::base::SharedEncoding; use crate::memory::{Arena, SharedMemoryLimiter}; -use crate::parser::{Parser, ParserDirective, SharedAttributeBuffer}; +use crate::parser::{Parser, ParserDirective}; use crate::rewriter::RewritingError; pub use self::dispatcher::{