Skip to content

Commit

Permalink
Streaming content mutations
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski committed Nov 6, 2024
1 parent 8a7d8b7 commit fc117d1
Show file tree
Hide file tree
Showing 12 changed files with 387 additions and 19 deletions.
13 changes: 8 additions & 5 deletions fuzz/test_case/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use std::ffi::{CStr, CString};

use encoding_rs::*;
use lol_html::html_content::ContentType;
use lol_html::{comments, doc_comments, doc_text, element, text, HtmlRewriter, MemorySettings, Settings};
use lol_html::{comments, doc_comments, doc_text, element, streaming, text};
use lol_html::{HtmlRewriter, MemorySettings, Settings};

include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

Expand Down Expand Up @@ -111,10 +112,12 @@ fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) {
&format!("<!--[/ELEMENT('{selector}')]-->"),
ContentType::Html,
);
el.set_inner_content(
&format!("<!--Replaced ({selector}) -->"),
ContentType::Html,
);

let replaced = format!("<!--Replaced ({selector}) -->");
el.streaming_set_inner_content(streaming!(move |sink| {
sink.write_str(&replaced, ContentType::Html);
Ok(())
}));

Ok(())
}),
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub mod errors {
pub mod html_content {
pub use super::rewritable_units::{
Attribute, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag,
TextChunk, UserData,
StreamingHandler, StreamingHandlerSink, TextChunk, UserData,
};

pub use super::html::TextType;
Expand Down
3 changes: 1 addition & 2 deletions src/rewritable_units/document_end.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::text_encoder::StreamingHandlerSink;
use super::ContentType;
use super::{ContentType, StreamingHandlerSink};
use encoding_rs::Encoding;

use crate::transform_stream::OutputSink;
Expand Down
102 changes: 98 additions & 4 deletions src/rewritable_units/element.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use super::mutations::MutationsInner;
use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StringChunk};
use super::{
Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StreamingHandler,
StringChunk,
};
use crate::base::Bytes;
use crate::rewriter::{HandlerTypes, LocalHandlerTypes};
use encoding_rs::Encoding;
Expand Down Expand Up @@ -241,6 +244,19 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
.push_back((content, content_type).into());
}

/// Inserts content from a [`StreamingHandler`] before the element.
///
/// Consequent calls to the method append to the previously inserted content.
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_before(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.start_tag
.mutations
.mutate()
.content_before
.push_back(string_writer.into());
}

/// Inserts `content` after the element.
///
/// Consequent calls to the method prepend `content` to the previously inserted content.
Expand Down Expand Up @@ -283,6 +299,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
.push_front(chunk);
}

/// Inserts content from a [`StreamingHandler`] after the element.
///
/// Consequent calls to the method prepend to the previously inserted content.
///
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_after(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.after_chunk(string_writer.into());
}

/// Prepends `content` to the element's inner content, i.e. inserts content right after
/// the element's start tag.
///
Expand Down Expand Up @@ -333,6 +359,20 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Prepends content from a [`StreamingHandler`] to the element's inner content,
/// i.e. inserts content right after the element's start tag.
///
/// Consequent calls to the method prepend to the previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
///
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_prepend(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.prepend_chunk(string_writer.into());
}

/// Appends `content` to the element's inner content, i.e. inserts content right before
/// the element's end tag.
///
Expand Down Expand Up @@ -379,6 +419,19 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Appends content from a [`StreamingHandler`] to the element's inner content,
/// i.e. inserts content right before the element's end tag.
///
/// Consequent calls to the method append to the previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_append(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.append_chunk(string_writer.into());
}

/// Replaces inner content of the element with `content`.
///
/// Consequent calls to the method overwrite previously inserted content.
Expand Down Expand Up @@ -429,6 +482,19 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Replaces inner content of the element with content from a [`StreamingHandler`].
///
/// Consequent calls to the method overwrite previously inserted content.
/// A call to the method doesn't make any effect if the element is an [empty element].
///
/// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element
///
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_set_inner_content(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.set_inner_content_chunk(string_writer.into());
}

/// Replaces the element and its inner content with `content`.
///
/// Consequent calls to the method overwrite previously inserted content.
Expand Down Expand Up @@ -470,6 +536,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> {
}
}

/// Replaces the element and its inner content with content from a [`StreamingHandler`].
///
/// Consequent calls to the method overwrite previously inserted content.
///
///
/// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure.
pub fn streaming_replace(&mut self, string_writer: Box<dyn StreamingHandler>) {
self.replace_chunk(string_writer.into());
}

/// Removes the element and its inner content.
#[inline]
pub fn remove(&mut self) {
Expand Down Expand Up @@ -638,6 +714,7 @@ mod tests {
use crate::rewritable_units::test_utils::*;
use crate::*;
use encoding_rs::{Encoding, EUC_JP, UTF_8};
use rewritable_units::StreamingHandlerSink;

fn rewrite_element(
html: &[u8],
Expand All @@ -660,7 +737,11 @@ mod tests {
el.before("[before: should be removed]", ContentType::Text);
el.after("[after: should be removed]", ContentType::Text);
el.append("[append: should be removed]", ContentType::Text);
el.before("[before: should be removed]", ContentType::Text);
el.streaming_before(Box::new(|sink: &mut StreamingHandlerSink<'_>| {
sink.write_str("[before:", ContentType::Text);
sink.write_str(" should be removed]", ContentType::Text);
Ok(())
}));
Ok(())
}),
],
Expand Down Expand Up @@ -962,7 +1043,10 @@ mod tests {
encoded("<div><span>Hi<inner-remove-me>RemoveŴ</inner-remove-me></span></div>")
{
let output = rewrite_element(&html, enc, "span", |el| {
el.prepend("<prepended>", ContentType::Html);
el.streaming_prepend(streaming!(|s| {
s.write_str("<prepended>", ContentType::Html);
Ok(())
}));
el.append("<appended>", ContentType::Html);
el.set_inner_content("<imgŵ>", ContentType::Html);
el.set_inner_content("<imgŵ>", ContentType::Text);
Expand Down Expand Up @@ -1096,7 +1180,17 @@ mod tests {
#[test]
fn self_closing_element() {
let output = rewrite_element(b"<svg><foo/>Hi</foo></svg>", UTF_8, "foo", |el| {
el.after("<!--after-->", ContentType::Html);
el.after("->", ContentType::Html);
el.streaming_after(streaming!(|sink| {
sink.write_str("er-", ContentType::Html);
Ok(())
}));
el.after("t", ContentType::Html);
el.streaming_after(streaming!(|sink| {
sink.write_str("af", ContentType::Html);
Ok(())
}));
el.after("<!--", ContentType::Html);
el.set_tag_name("bar").unwrap();
});

Expand Down
3 changes: 2 additions & 1 deletion src/rewritable_units/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ use std::any::Any;

pub use self::document_end::*;
pub use self::element::*;
pub use self::mutations::ContentType;
pub use self::mutations::{ContentType, StreamingHandler};
pub(crate) use self::mutations::{Mutations, StringChunk};
pub use self::text_encoder::StreamingHandlerSink;
pub use self::tokens::*;

/// Data that can be attached to a rewritable unit by a user and shared between content handler
Expand Down
53 changes: 52 additions & 1 deletion src/rewritable_units/mutations.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::text_encoder::StreamingHandlerSink;
use super::StreamingHandlerSink;
use std::error::Error as StdError;
use std::panic::{RefUnwindSafe, UnwindSafe};

type BoxResult = Result<(), Box<dyn StdError + Send + Sync>>;

Expand Down Expand Up @@ -92,6 +93,7 @@ impl From<(&str, ContentType)> for StringChunk {

pub(crate) enum StringChunk {
Buffer(Box<str>, ContentType),
Stream(Box<dyn StreamingHandler>),
}

#[derive(Default)]
Expand Down Expand Up @@ -126,8 +128,57 @@ impl DynamicString {
StringChunk::Buffer(content, content_type) => {
sink.write_str(&content, content_type);
}
StringChunk::Stream(handler) => {
handler.write_all(sink)?;
}
};
}
Ok(())
}
}

/// A callback used to write content asynchronously.
pub trait StreamingHandler: Send {
/// This method is called only once, and is expected to write content
/// by calling the [`sink.write_str()`](StreamingHandlerSink::write_str) one or more times.
///
/// Multiple calls to `sink.write_str()` append more content to the output.
///
/// See [`StreamingHandlerSink`].
fn write_all(self: Box<Self>, sink: &mut StreamingHandlerSink<'_>) -> BoxResult;

// Safety: due to lack of Sync, this trait must not have `&self` methods
}

/// Avoid requring `StreamingHandler` to be `Sync`.
/// It only has a method taking exclusive ownership, so there's no sharing possible.
unsafe impl Sync for StringChunk {}
impl RefUnwindSafe for StringChunk {}
impl UnwindSafe for StringChunk {}

impl<F> From<F> for Box<dyn StreamingHandler>
where
F: FnOnce(&mut StreamingHandlerSink<'_>) -> BoxResult + Send + 'static,
{
#[inline]
fn from(f: F) -> Self {
Box::new(f)
}
}

impl<F> StreamingHandler for F
where
F: FnOnce(&mut StreamingHandlerSink<'_>) -> BoxResult + Send + 'static,
{
#[inline]
fn write_all(self: Box<F>, sink: &mut StreamingHandlerSink<'_>) -> BoxResult {
(self)(sink)
}
}

impl From<Box<dyn StreamingHandler>> for StringChunk {
#[inline]
fn from(writer: Box<dyn StreamingHandler>) -> Self {
Self::Stream(writer)
}
}
28 changes: 28 additions & 0 deletions src/rewritable_units/text_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,31 @@ impl TextEncoder {
}
}
}

#[test]
fn long_text() {
let mut written = 0;
let mut expected = 0;
let mut handler = |ch: &[u8]| {
assert!(
ch.iter().all(|&c| {
written += 1;
c == if 0 != written & 1 {
177
} else {
b'0' + ((written / 2 - 1) % 10) as u8
}
}),
"@{written} {ch:?}"
);
};
let mut t = StreamingHandlerSink::new(encoding_rs::ISO_8859_2, &mut handler);

let mut s = "ą0ą1ą2ą3ą4ą5ą6ą7ą8ą9".repeat(128);
while s.len() <= 1 << 17 {
s.push_str(&s.clone());
expected += s.chars().count();
t.write_str(&s, ContentType::Text);
}
assert_eq!(expected, written);
}
Loading

0 comments on commit fc117d1

Please sign in to comment.