From 7eff22493286fc38d79d87e2cdd9d12895a28f38 Mon Sep 17 00:00:00 2001 From: Kornel Date: Fri, 1 Nov 2024 13:55:17 +0000 Subject: [PATCH] C API for streaming content mutations --- c-api/c-tests/src/test.c | 6 +- c-api/c-tests/src/test_element_api.c | 112 ++++++++++++ c-api/cbindgen.toml | 16 ++ c-api/include/lol_html.h | 244 +++++++++++++++++++++++++++ c-api/src/comment.rs | 49 ++---- c-api/src/doctype.rs | 13 +- c-api/src/document_end.rs | 12 +- c-api/src/element.rs | 136 +++------------ c-api/src/errors.rs | 9 + c-api/src/lib.rs | 128 ++++++++++++-- c-api/src/streaming.rs | 116 +++++++++++++ c-api/src/text_chunk.rs | 55 ++---- 12 files changed, 676 insertions(+), 220 deletions(-) create mode 100644 c-api/cbindgen.toml create mode 100644 c-api/src/streaming.rs diff --git a/c-api/c-tests/src/test.c b/c-api/c-tests/src/test.c index 70ce42dc..7ea22ef4 100644 --- a/c-api/c-tests/src/test.c +++ b/c-api/c-tests/src/test.c @@ -16,5 +16,9 @@ int run_tests() { subtest("Element API", element_api_test); subtest("Document end API", document_end_api_test); subtest("Memory limiting", test_memory_limiting); - return done_testing(); + int res = done_testing(); + if (res) { + fprintf(stderr, "\nSome tests have failed\n"); + } + return res; } diff --git a/c-api/c-tests/src/test_element_api.c b/c-api/c-tests/src/test_element_api.c index 70236b18..95f9a7e5 100644 --- a/c-api/c-tests/src/test_element_api.c +++ b/c-api/c-tests/src/test_element_api.c @@ -238,6 +238,117 @@ static void test_insert_content_around_element(lol_html_selector_t *selector, vo ); } +//------------------------------------------------------------------------- +EXPECT_OUTPUT( + streaming_mutations_output_sink, + "&before
Hi
&after\xf0\x9f\x98\x82", + &EXPECTED_USER_DATA, + sizeof(EXPECTED_USER_DATA) +); + +static void loltest_drop(void *user_data) { + int *drops = user_data; + (*drops)++; +} + +static int loltest_write_all_callback_before(lol_html_streaming_sink_t *sink, void *user_data) { + int *counter = user_data; + ok(*counter >= 100 && *counter <= 103); + + const char *before = "&before"; + return lol_html_streaming_sink_write_str(sink, before, strlen(before), false); +} + +static int loltest_write_all_callback_after(lol_html_streaming_sink_t *sink, void *user_data) { + int *counter = user_data; + ok(*counter >= 100 && *counter <= 103); + + const char *after = "&after"; + const char emoji[] = {0xf0,0x9f,0x98,0x82}; + return lol_html_streaming_sink_write_str(sink, after, strlen(after), false) || + lol_html_streaming_sink_write_utf8_chunk(sink, &emoji[0], 1, false) || + lol_html_streaming_sink_write_utf8_chunk(sink, &emoji[1], 1, false) || + lol_html_streaming_sink_write_utf8_chunk(sink, &emoji[2], 1, false) || + lol_html_streaming_sink_write_utf8_chunk(sink, &emoji[3], 1, false); +} + +static int loltest_write_all_callback_prepend(lol_html_streaming_sink_t *sink, void *user_data) { + int *counter = user_data; + ok(*counter >= 100 && *counter <= 103); + + const char *prepend1 = ""; + return lol_html_streaming_sink_write_utf8_chunk(sink, prepend1, strlen(prepend1), true) || + lol_html_streaming_sink_write_utf8_chunk(sink, prepend2, strlen(prepend2), true); +} + +static int loltest_write_all_callback_append(lol_html_streaming_sink_t *sink, void *user_data) { + int *counter = user_data; + ok(*counter >= 100 && *counter <= 103); + + const char *append = ""; + return lol_html_streaming_sink_write_str(sink, append, strlen(append), true); +} + +static lol_html_rewriter_directive_t streaming_mutations_around_element( + lol_html_element_t *element, + void *user_data +) { + note("Stream before/prepend"); + ok(!lol_html_element_streaming_before(element, &(lol_html_streaming_handler_t){ + .write_all_callback = loltest_write_all_callback_before, + .user_data = user_data, + .drop_callback = loltest_drop, + })); + ok(!lol_html_element_streaming_prepend(element, &(lol_html_streaming_handler_t){ + .write_all_callback = loltest_write_all_callback_prepend, + .user_data = user_data, + // tests null drop callback + })); + note("Stream after/append"); + ok(!lol_html_element_streaming_append(element, &(lol_html_streaming_handler_t){ + .write_all_callback = loltest_write_all_callback_append, + .user_data = user_data, + .drop_callback = loltest_drop, + })); + ok(!lol_html_element_streaming_after(element, &(lol_html_streaming_handler_t){ + .write_all_callback = loltest_write_all_callback_after, + .user_data = user_data, + .drop_callback = loltest_drop, + })); + + return LOL_HTML_CONTINUE; +} + +static void test_streaming_mutations_around_element(lol_html_selector_t *selector, void *user_data) { + UNUSED(user_data); + lol_html_rewriter_builder_t *builder = lol_html_rewriter_builder_new(); + + int drop_count = 100; + + int err = lol_html_rewriter_builder_add_element_content_handlers( + builder, + selector, + &streaming_mutations_around_element, + &drop_count, + NULL, + NULL, + NULL, + NULL + ); + + ok(!err); + + run_rewriter( + builder, + "
Hi
", + streaming_mutations_output_sink, + user_data + ); + + ok(drop_count == 103); // one has no drop callback on purpose +} + //------------------------------------------------------------------------- EXPECT_OUTPUT( set_element_inner_content_output_sink, @@ -706,6 +817,7 @@ void element_api_test() { test_iterate_attributes(selector, &user_data); test_get_and_modify_attributes(selector, &user_data); test_insert_content_around_element(selector, &user_data); + test_streaming_mutations_around_element(selector, &user_data); lol_html_selector_free(selector); } diff --git a/c-api/cbindgen.toml b/c-api/cbindgen.toml new file mode 100644 index 00000000..bfa2aae8 --- /dev/null +++ b/c-api/cbindgen.toml @@ -0,0 +1,16 @@ +# To generate a header: +# +# cargo expand > tmp.rs +# cbindgen tmp.rs + +language = "C" +tab_width = 4 +documentation = true +documentation_style = "c99" +documentation_length = "full" + +[export] +prefix = "lol_html_" + +[export.mangle] +rename_types = "SnakeCase" diff --git a/c-api/include/lol_html.h b/c-api/include/lol_html.h index 54e5e5c1..27e37931 100644 --- a/c-api/include/lol_html.h +++ b/c-api/include/lol_html.h @@ -30,6 +30,7 @@ typedef struct lol_html_Element lol_html_element_t; typedef struct lol_html_AttributesIterator lol_html_attributes_iterator_t; typedef struct lol_html_Attribute lol_html_attribute_t; typedef struct lol_html_Selector lol_html_selector_t; +typedef struct lol_html_CStreamingHandlerSink lol_html_streaming_sink_t; // Library-allocated UTF8 string fat pointer. // @@ -116,6 +117,30 @@ typedef lol_html_rewriter_directive_t (*lol_html_end_tag_handler_t)( void *user_data ); +// For use with streaming content handlers. +// +// Safety: the user data and the callbacks must be safe to use from a different thread (e.g. can't rely on thread-local storage). +// It doesn't have to be `Sync`, it will be used only by one thread at a time. +// +// Handler functions copy this struct. It can (and should) be created on the stack. +typedef struct lol_html_CStreamingHandler { + // Anything you like + void *user_data; + // Called when the handler is supposed to produce its output. Return `0` for success. + // The `sink` argument is guaranteed non-`NULL`. It is valid only for the duration of this call, and can only be used on the same thread. + // The sink is for [`lol_html_streaming_sink_write_str`] and [`lol_html_streaming_sink_write_utf8_chunk`]. + // `user_data` comes from this struct. + // + // `write_all_callback` must not be `NULL`. + int (*write_all_callback)(lol_html_streaming_sink_t *sink, void *user_data); + // Called exactly once, after the last use of this handler. + // It may be `NULL`. + // `user_data` comes from this struct. + void (*drop_callback)(void *user_data); + // *Always* initialize to `NULL`. + void *reserved; +} lol_html_streaming_handler_t; + // Selector //--------------------------------------------------------------------- @@ -792,6 +817,225 @@ int lol_html_doc_end_append( bool is_html ); + + +//[`Element::streaming_prepend`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_prepend(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`Element::streaming_append`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_append(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`Element::streaming_before`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_before(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`Element::streaming_after`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_after(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`Element::streaming_set_inner_content`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_set_inner_content(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`Element::streaming_replace`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`element` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_element_streaming_replace(lol_html_element_t *element, + lol_html_streaming_handler_t *streaming_writer); + +//[`EndTag::streaming_before`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`end_tag` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_end_tag_streaming_before(lol_html_end_tag_t *end_tag, + lol_html_streaming_handler_t *streaming_writer); + +//[`EndTag::streaming_after`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`end_tag` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_end_tag_streaming_after(lol_html_end_tag_t *end_tag, + lol_html_streaming_handler_t *streaming_writer); + +//[`EndTag::streaming_replace`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`end_tag` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_end_tag_streaming_replace(lol_html_end_tag_t *end_tag, + lol_html_streaming_handler_t *streaming_writer); + + +//[`TextChunk::streaming_before`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`text_chunk` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_text_chunk_streaming_before(lol_html_text_chunk_t *text_chunk, + lol_html_streaming_handler_t *streaming_writer); + +//[`TextChunk::streaming_after`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`text_chunk` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_text_chunk_streaming_after(lol_html_text_chunk_t *text_chunk, + lol_html_streaming_handler_t *streaming_writer); + +//[`TextChunk::streaming_replace`] +// +// The [`CStreamingHandler`] contains callbacks that will be called +// when the content needs to be written. +// +// `streaming_writer` is copied immediately, and doesn't have a stable address. +// `streaming_writer` may be used from another thread (`Send`), but it's only going +// to be used by one thread at a time (`!Sync`). +// +//`text_chunk` +// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. +// +// Returns 0 on success. +int lol_html_text_chunk_streaming_replace(lol_html_text_chunk_t *text_chunk, + lol_html_streaming_handler_t *streaming_writer); + +// Write another piece of UTF-8 data to the output. Returns `0` on success, and `-1` if it wasn't valid UTF-8. +// All pointers must be non-NULL. +int lol_html_streaming_sink_write_str(lol_html_streaming_sink_t *sink, + const char *string_utf8, + size_t string_utf8_len, + bool is_html); + +// [`StreamingHandlerSink::write_utf8_chunk`] +// +// Writes as much of the given UTF-8 fragment as possible, converting the encoding and HTML-escaping if `is_html` is `false`. +// +// The `bytes_utf8` doesn't need to be a complete UTF-8 string, as long as consecutive calls to this function create a valid UTF-8 string. +// Any incomplete UTF-8 sequence at the end of the content is buffered and flushed as soon as it's completed. +// +// Other functions like [`lol_html_streaming_sink_write_str`] should not be called after a +// `lol_html_streaming_sink_write_utf8_chunk` call with an incomplete UTF-8 sequence. +// +// Returns `0` on success, and `-1` if it wasn't valid UTF-8. +// All pointers must be non-`NULL`. +int lol_html_streaming_sink_write_utf8_chunk(lol_html_streaming_sink_t *sink, + const char *bytes_utf8, + size_t bytes_utf8_len, + bool is_html); + #if defined(__cplusplus) } // extern C #endif diff --git a/c-api/src/comment.rs b/c-api/src/comment.rs index 81c4e056..25707318 100644 --- a/c-api/src/comment.rs +++ b/c-api/src/comment.rs @@ -19,45 +19,16 @@ pub extern "C" fn lol_html_comment_text_set( 0 } -#[no_mangle] -pub extern "C" fn lol_html_comment_before( - comment: *mut Comment, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { comment.before(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_comment_after( - comment: *mut Comment, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { comment.after(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_comment_replace( - comment: *mut Comment, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { comment.replace(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_comment_remove(comment: *mut Comment) { - to_ref_mut!(comment).remove(); -} - -#[no_mangle] -pub extern "C" fn lol_html_comment_is_removed(comment: *const Comment) -> bool { - to_ref!(comment).removed() -} +impl_content_mutation_handlers! { comment: Comment [ + lol_html_comment_before => before, + lol_html_comment_after => after, + lol_html_comment_replace => replace, + @VOID lol_html_comment_remove => remove, + @BOOL lol_html_comment_is_removed => removed, + @STREAM lol_html_comment_streaming_before => streaming_before, + @STREAM lol_html_comment_streaming_after => streaming_after, + @STREAM lol_html_comment_streaming_replace => streaming_replace, +] } #[no_mangle] pub extern "C" fn lol_html_comment_user_data_set(comment: *mut Comment, user_data: *mut c_void) { diff --git a/c-api/src/doctype.rs b/c-api/src/doctype.rs index cfa30e4c..3afcf38e 100644 --- a/c-api/src/doctype.rs +++ b/c-api/src/doctype.rs @@ -25,12 +25,7 @@ pub extern "C" fn lol_html_doctype_user_data_get(doctype: *const Doctype) -> *mu get_user_data!(doctype) } -#[no_mangle] -pub extern "C" fn lol_html_doctype_remove(doctype: *mut Doctype) { - to_ref_mut!(doctype).remove(); -} - -#[no_mangle] -pub extern "C" fn lol_html_doctype_is_removed(doctype: *const Doctype) -> bool { - to_ref!(doctype).removed() -} +impl_content_mutation_handlers! { doctype: Doctype [ + @VOID lol_html_doctype_remove => remove, + @BOOL lol_html_doctype_is_removed => removed, +] } diff --git a/c-api/src/document_end.rs b/c-api/src/document_end.rs index 6bd9eee5..566d7f2e 100644 --- a/c-api/src/document_end.rs +++ b/c-api/src/document_end.rs @@ -1,11 +1,5 @@ use super::*; -#[no_mangle] -pub extern "C" fn lol_html_doc_end_append( - document_end: *mut DocumentEnd, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { document_end.append(content, content_len, is_html) } -} +impl_content_mutation_handlers! { doc_end: DocumentEnd [ + lol_html_doc_end_append => append, +] } diff --git a/c-api/src/element.rs b/c-api/src/element.rs index 897b70a6..28f1f20b 100644 --- a/c-api/src/element.rs +++ b/c-api/src/element.rs @@ -29,16 +29,6 @@ pub extern "C" fn lol_html_element_tag_name_set( 0 } -#[no_mangle] -pub extern "C" fn lol_html_element_is_self_closing(element: *mut Element) -> bool { - to_ref!(element).is_self_closing() -} - -#[no_mangle] -pub extern "C" fn lol_html_element_can_have_content(element: *mut Element) -> bool { - to_ref!(element).can_have_content() -} - #[no_mangle] pub extern "C" fn lol_html_element_namespace_uri_get(element: *mut Element) -> *const c_char { let element = to_ref!(element); @@ -157,80 +147,25 @@ pub extern "C" fn lol_html_element_remove_attribute( 0 } -#[no_mangle] -pub extern "C" fn lol_html_element_before( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.before(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_prepend( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.prepend(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_append( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.append(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_after( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.after(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_set_inner_content( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.set_inner_content(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_replace( - element: *mut Element, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { element.replace(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_element_remove(element: *mut Element) { - to_ref_mut!(element).remove(); -} - -#[no_mangle] -pub extern "C" fn lol_html_element_remove_and_keep_content(element: *mut Element) { - to_ref_mut!(element).remove_and_keep_content(); -} - -#[no_mangle] -pub extern "C" fn lol_html_element_is_removed(element: *mut Element) -> bool { - to_ref_mut!(element).removed() -} +impl_content_mutation_handlers! { element: Element [ + lol_html_element_prepend => prepend, + lol_html_element_append => append, + lol_html_element_before => before, + lol_html_element_after => after, + lol_html_element_set_inner_content => set_inner_content, + lol_html_element_replace => replace, + @VOID lol_html_element_remove => remove, + @VOID lol_html_element_remove_and_keep_content => remove_and_keep_content, + @BOOL lol_html_element_is_removed => removed, + @BOOL lol_html_element_is_self_closing => is_self_closing, + @BOOL lol_html_element_can_have_content => can_have_content, + @STREAM lol_html_element_streaming_prepend => streaming_prepend, + @STREAM lol_html_element_streaming_append => streaming_append, + @STREAM lol_html_element_streaming_before => streaming_before, + @STREAM lol_html_element_streaming_after => streaming_after, + @STREAM lol_html_element_streaming_set_inner_content => streaming_set_inner_content, + @STREAM lol_html_element_streaming_replace => streaming_replace, +] } #[no_mangle] pub extern "C" fn lol_html_element_user_data_set(element: *mut Element, user_data: *mut c_void) { @@ -274,30 +209,15 @@ pub extern "C" fn lol_html_element_clear_end_tag_handlers(element: *mut Element) } } -#[no_mangle] -pub extern "C" fn lol_html_end_tag_before( - end_tag: *mut EndTag, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { end_tag.before(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_end_tag_after( - end_tag: *mut EndTag, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { end_tag.after(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_end_tag_remove(end_tag: *mut EndTag) { - to_ref_mut!(end_tag).remove(); -} +impl_content_mutation_handlers! { end_tag: EndTag [ + lol_html_end_tag_before => before, + lol_html_end_tag_after => after, + lol_html_end_tag_replace => replace, + @VOID lol_html_end_tag_remove => remove, + @STREAM lol_html_end_tag_streaming_before => streaming_before, + @STREAM lol_html_end_tag_streaming_after => streaming_after, + @STREAM lol_html_end_tag_streaming_replace => streaming_replace, +] } #[no_mangle] pub extern "C" fn lol_html_end_tag_name_get(end_tag: *mut EndTag) -> Str { diff --git a/c-api/src/errors.rs b/c-api/src/errors.rs index 9e7b4f87..27413e8c 100644 --- a/c-api/src/errors.rs +++ b/c-api/src/errors.rs @@ -11,3 +11,12 @@ pub extern "C" fn lol_html_take_last_error() -> Str { Str::from_opt(err.map(|e| e.to_string())) } + +#[derive(Error, Debug, Eq, PartialEq, Copy, Clone)] +pub enum CStreamingHandlerError { + #[error("Not all fields of the struct were initialized")] + Uninitialized, + + #[error("write_all_callback reported error: {0}")] + HandlerError(c_int), +} diff --git a/c-api/src/lib.rs b/c-api/src/lib.rs index b41d4662..04523d7d 100644 --- a/c-api/src/lib.rs +++ b/c-api/src/lib.rs @@ -1,3 +1,4 @@ +pub use crate::streaming::CStreamingHandler; use libc::{c_char, c_int, c_void, size_t}; use lol_html::html_content::*; use lol_html::*; @@ -81,6 +82,94 @@ macro_rules! unwrap_or_ret_null { }; } +macro_rules! impl_content_mutation_handlers { + ($name:ident: $typ:ty [ $($(@$kind:ident)? $fn_name:ident => $method:ident),+$(,)? ]) => { + $( + // stable Rust can't concatenate idents, so fn_name must be written out manually, + // but it is possible to compare concatenated strings. + #[cfg(debug_assertions)] + const _: () = { + let expected_fn_name_prefix = concat!("lol_html_", stringify!($name), "_").as_bytes(); + let fn_name = stringify!($fn_name).as_bytes(); + // removed vs is_removed prevents exact comparison + assert!(fn_name.len() >= expected_fn_name_prefix.len() + (stringify!($method).len()), stringify!($fn_name)); + let mut i = 0; + while i < expected_fn_name_prefix.len() { + assert!(expected_fn_name_prefix[i] == fn_name[i], stringify!($fn_name)); + i += 1; + } + }; + impl_content_mutation_handlers! { IMPL $($kind)? $name: $typ, $fn_name => $method } + )+ + }; + (IMPL $name:ident: $typ:ty, $fn_name:ident => $method:ident) => { + #[doc = concat!("[`", stringify!($typ), "::", stringify!($method), "`]")] + /// + /// The `content` must be a valid UTF-8 string. It's copied immediately. + /// If `is_html` is `true`, then the `content` will be written without HTML-escaping. + /// + #[doc = concat!("`", stringify!($name), "`")] + /// must be valid and non-`NULL`. If `content` is `NULL`, an error will be reported. + /// + /// Returns 0 on success. + #[no_mangle] + pub unsafe extern "C" fn $fn_name( + $name: *mut $typ, + content: *const c_char, + content_len: size_t, + is_html: bool, + ) -> c_int { + content_insertion_fn_body! { $name.$method(content, content_len, is_html) } + } + }; + (IMPL STREAM $name:ident: $typ:ty, $fn_name:ident => $method:ident) => { + #[doc = concat!("[`", stringify!($typ), "::", stringify!($method), "`]")] + /// + /// The [`CStreamingHandler`] contains callbacks that will be called + /// when the content needs to be written. + /// + /// `streaming_writer` is copied immediately, and doesn't have a stable address. + /// `streaming_writer` may be used from another thread (`Send`), but it's only going + /// to be used by one thread at a time (`!Sync`). + /// + #[doc = concat!("`", stringify!($name), "`")] + /// must be valid and non-`NULL`. If `streaming_writer` is `NULL`, an error will be reported. + /// + /// Returns 0 on success. + #[no_mangle] + pub unsafe extern "C" fn $fn_name( + $name: *mut $typ, + streaming_writer: *mut CStreamingHandler, + ) -> c_int { + content_insertion_fn_body! { $name.$method(streaming_writer) } + } + }; + (IMPL VOID $name:ident: $typ:ty, $fn_name:ident => $method:ident) => { + #[doc = concat!("[`", stringify!($typ), "::", stringify!($method), "`]")] + /// + #[doc = concat!("`", stringify!($name), "`")] + /// must be valid and non-`NULL`. + #[no_mangle] + pub unsafe extern "C" fn $fn_name( + $name: *mut $typ, + ) { + to_ref_mut!($name).$method(); + } + }; + (IMPL BOOL $name:ident: $typ:ty, $fn_name:ident => $method:ident) => { + #[doc = concat!("[`", stringify!($typ), "::", stringify!($method), "`]")] + /// + #[doc = concat!("`", stringify!($name), "`")] + /// must be valid and non-`NULL`. Returns `_Bool`. + #[no_mangle] + pub unsafe extern "C" fn $fn_name( + $name: *mut $typ, + ) -> bool { + to_ref_mut!($name).$method() + } + }; +} + macro_rules! content_insertion_fn_body { ($target:ident.$method:ident($content:ident, $content_len:ident, $is_html:ident)) => {{ let target = to_ref_mut!($target); @@ -97,6 +186,24 @@ macro_rules! content_insertion_fn_body { 0 }}; + ($target:ident.$method:ident($handler:expr)) => {{ + let handler_ptr: *mut CStreamingHandler = $handler; + if unsafe { handler_ptr.as_ref() }.is_none_or(|handler| !handler.reserved.is_null()) { + // we can't even safely call drop callback on this + return -1; + } + // Taking ownership of the CStreamingHandler + let handler: Box = Box::new(unsafe { handler_ptr.read() }); + if handler.write_all_callback.is_none() { + return -1; + } + if let Some(target) = unsafe { $target.as_mut() } { + target.$method(handler); + 0 + } else { + -1 + } + }}; } macro_rules! get_user_data { @@ -109,16 +216,17 @@ macro_rules! get_user_data { }; } -mod comment; -mod doctype; -mod document_end; -mod element; -mod errors; -mod rewriter; -mod rewriter_builder; -mod selector; -mod string; -mod text_chunk; +pub mod comment; +pub mod doctype; +pub mod document_end; +pub mod element; +pub mod errors; +pub mod rewriter; +pub mod rewriter_builder; +pub mod selector; +pub mod streaming; +pub mod string; +pub mod text_chunk; pub use self::string::Str; diff --git a/c-api/src/streaming.rs b/c-api/src/streaming.rs new file mode 100644 index 00000000..426edffd --- /dev/null +++ b/c-api/src/streaming.rs @@ -0,0 +1,116 @@ +use super::*; +use crate::errors::CStreamingHandlerError; +use lol_html::html_content::StreamingHandler; +use lol_html::html_content::StreamingHandlerSink; + +/// Opaque type from C's perspective +pub type CStreamingHandlerSink<'tmp> = StreamingHandlerSink<'tmp>; + +/// Write another piece of UTF-8 data to the output. Returns `0` on success, and `-1` if it wasn't valid UTF-8. +/// All pointers must be non-NULL. +#[no_mangle] +pub unsafe extern "C" fn lol_html_streaming_sink_write_str( + sink: *mut CStreamingHandlerSink<'_>, + string_utf8: *const c_char, + string_utf8_len: size_t, + is_html: bool, +) -> c_int { + let sink = to_ref_mut!(sink); + let content = unwrap_or_ret_err_code! { to_str!(string_utf8, string_utf8_len) }; + let is_html = if is_html { + ContentType::Html + } else { + ContentType::Text + }; + + sink.write_str(content, is_html); + 0 +} + +/// [`StreamingHandlerSink::write_utf8_chunk`] +/// +/// Writes as much of the given UTF-8 fragment as possible, converting the encoding and HTML-escaping if `is_html` is `false`. +/// +/// The `bytes_utf8` doesn't need to be a complete UTF-8 string, as long as consecutive calls to this function create a valid UTF-8 string. +/// Any incomplete UTF-8 sequence at the end of the content is buffered and flushed as soon as it's completed. +/// +/// Other functions like [`lol_html_streaming_sink_write_str`] should not be called after a +/// `lol_html_streaming_sink_write_utf8_chunk` call with an incomplete UTF-8 sequence. +/// +/// Returns `0` on success, and `-1` if it wasn't valid UTF-8. +/// All pointers must be non-`NULL`. +#[no_mangle] +pub unsafe extern "C" fn lol_html_streaming_sink_write_utf8_chunk( + sink: *mut CStreamingHandlerSink<'_>, + bytes_utf8: *const c_char, + bytes_utf8_len: size_t, + is_html: bool, +) -> c_int { + let sink = to_ref_mut!(sink); + let content = to_bytes!(bytes_utf8, bytes_utf8_len); + let is_html = if is_html { + ContentType::Html + } else { + ContentType::Text + }; + + unwrap_or_ret_err_code! { sink.write_utf8_chunk(content, is_html) }; + 0 +} + +/// Safety: the user data and the callbacks must be safe to use from a different thread (e.g. can't rely on thread-local storage). +/// It doesn't have to be `Sync`, it will be used only by one thread at a time. +/// +/// Handler functions copy this struct. It can (and should) be created on the stack. +#[repr(C)] +pub struct CStreamingHandler { + /// Anything you like + pub user_data: *mut c_void, + /// Called when the handler is supposed to produce its output. Return `0` for success. + /// The `sink` argument is guaranteed non-`NULL`. It is valid only for the duration of this call, and can only be used on the same thread. + /// The sink is for [`lol_html_streaming_sink_write_str`] and [`lol_html_streaming_sink_write_utf8_chunk`]. + /// `user_data` comes from this struct. + /// `write_all_callback` must not be `NULL`. + pub write_all_callback: Option< + unsafe extern "C" fn(sink: &mut CStreamingHandlerSink<'_>, user_data: *mut c_void) -> c_int, + >, + /// Called exactly once, after the last use of this handler. + /// `user_data` comes from this struct. + /// May be `NULL`. + pub drop_callback: Option, + /// *Always* initialize to `NULL`. + pub reserved: *mut c_void, +} + +// It's up to C to obey this +unsafe impl Send for CStreamingHandler {} + +impl StreamingHandler for CStreamingHandler { + fn write_all( + self: Box, + sink: &mut StreamingHandlerSink<'_>, + ) -> Result<(), Box<(dyn std::error::Error + Send + Sync)>> { + if !self.reserved.is_null() { + return Err(CStreamingHandlerError::Uninitialized.into()); + } + let cb = self + .write_all_callback + .ok_or(CStreamingHandlerError::Uninitialized)?; + let res = unsafe { (cb)(sink, self.user_data) }; + if res == 0 { + Ok(()) + } else { + Err(CStreamingHandlerError::HandlerError(res).into()) + } + } +} + +impl Drop for CStreamingHandler { + fn drop(&mut self) { + if let Some(cb) = self.drop_callback { + unsafe { + cb(self.user_data); + } + } + } +} diff --git a/c-api/src/text_chunk.rs b/c-api/src/text_chunk.rs index 76074b3a..31cb03ae 100644 --- a/c-api/src/text_chunk.rs +++ b/c-api/src/text_chunk.rs @@ -22,50 +22,17 @@ pub extern "C" fn lol_html_text_chunk_content_get(chunk: *mut TextChunk) -> Text TextChunkContent::new(to_ref!(chunk)) } -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_is_last_in_text_node(chunk: *mut TextChunk) -> bool { - to_ref!(chunk).last_in_text_node() -} - -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_before( - chunk: *mut TextChunk, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { chunk.before(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_after( - chunk: *mut TextChunk, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { chunk.after(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_replace( - chunk: *mut TextChunk, - content: *const c_char, - content_len: size_t, - is_html: bool, -) -> c_int { - content_insertion_fn_body! { chunk.replace(content, content_len, is_html) } -} - -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_remove(chunk: *mut TextChunk) { - to_ref_mut!(chunk).remove(); -} - -#[no_mangle] -pub extern "C" fn lol_html_text_chunk_is_removed(chunk: *const TextChunk) -> bool { - to_ref!(chunk).removed() -} +impl_content_mutation_handlers! { text_chunk: TextChunk [ + lol_html_text_chunk_before => before, + lol_html_text_chunk_after => after, + lol_html_text_chunk_replace => replace, + @VOID lol_html_text_chunk_remove => remove, + @BOOL lol_html_text_chunk_is_removed => removed, + @BOOL lol_html_text_chunk_is_last_in_text_node => last_in_text_node, + @STREAM lol_html_text_chunk_streaming_before => streaming_before, + @STREAM lol_html_text_chunk_streaming_after => streaming_after, + @STREAM lol_html_text_chunk_streaming_replace => streaming_replace, +] } #[no_mangle] pub extern "C" fn lol_html_text_chunk_user_data_set(chunk: *mut TextChunk, user_data: *mut c_void) {