From 35642b069140daabcdcb0b1c5f8fb9ed300b78fd Mon Sep 17 00:00:00 2001 From: Millennium Cyborg <79787803+millenium-cyborg@users.noreply.github.com> Date: Tue, 13 Sep 2022 10:10:41 +0100 Subject: [PATCH] Use SendTexture to lower the overhead of the spout filter (#46) * Use SendTexture() to avoid some extra memory copies * Double-buffer to give good perf without a flush * Use intermediate texture to handle sRGB * Tidy Authored-by: @millenium-cyborg Closes #42 --- source/win-spout-filter.cpp | 135 +++++++++++++++++------------------- 1 file changed, 62 insertions(+), 73 deletions(-) diff --git a/source/win-spout-filter.cpp b/source/win-spout-filter.cpp index cc73217..946e1ee 100644 --- a/source/win-spout-filter.cpp +++ b/source/win-spout-filter.cpp @@ -23,10 +23,11 @@ struct win_spout_filter spoutDX* filter_sender; obs_source_t* source_context; const char* sender_name; - pthread_mutex_t mutex; uint32_t width; uint32_t height; - gs_texrender_t* texrender; + gs_texrender_t* texrender_curr; + gs_texrender_t* texrender_prev; + gs_texrender_t* texrender_intermediate; gs_stagesurf_t* stagesurface; video_t* video_output; uint8_t* video_data; @@ -80,17 +81,6 @@ void win_spout_filter_getdefaults(obs_data_t* defaults) obs_module_text("defaultfiltername")); } -void win_spout_filter_raw_video(void* data, video_data* frame) -{ - struct win_spout_filter* context = (win_spout_filter*)data; - - if (!frame|| !frame->data[0]) return; - - pthread_mutex_lock(&context->mutex); - context->filter_sender->SendImage(frame->data[0], context->width, context->height); - pthread_mutex_unlock(&context->mutex); -} - void win_spout_offscreen_render(void* data, uint32_t cx, uint32_t cy) { @@ -104,15 +94,15 @@ void win_spout_offscreen_render(void* data, uint32_t cx, uint32_t cy) uint32_t width = obs_source_get_base_width(target); uint32_t height = obs_source_get_base_height(target); - gs_texrender_reset(context->texrender); - - if (gs_texrender_begin(context->texrender, width, height)) - { + // Render the target to an intemediate format in sRGB-aware format + gs_texrender_reset(context->texrender_intermediate); + if (gs_texrender_begin(context->texrender_intermediate, width, height)) { struct vec4 background; vec4_zero(&background); gs_clear(GS_CLEAR_COLOR, &background, 0.0f, 0); - gs_ortho(0.0f, (float)width, 0.0f, (float)height, -100.0f, 100.0f); + gs_ortho(0.0f, (float)width, 0.0f, (float)height, -100.0f, + 100.0f); gs_blend_state_push(); gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO); @@ -120,60 +110,60 @@ void win_spout_offscreen_render(void* data, uint32_t cx, uint32_t cy) obs_source_video_render(target); gs_blend_state_pop(); - gs_texrender_end(context->texrender); + gs_texrender_end(context->texrender_intermediate); + } + + // Use the default effect to render it back into a format Spout accepts + gs_texrender_reset(context->texrender_curr); + if (gs_texrender_begin(context->texrender_curr, width, height)) + { + struct vec4 background; + vec4_zero(&background); - if (context->width != width || context->height != height) - { + gs_clear(GS_CLEAR_COLOR, &background, 0.0f, 0); + gs_ortho(0.0f, (float)width, 0.0f, (float)height, -100.0f, 100.0f); - gs_stagesurface_destroy(context->stagesurface); - context->stagesurface = gs_stagesurface_create(width, height, GS_BGRA); + gs_blend_state_push(); + gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO); - video_output_info video_out = { 0 }; - video_out.format = VIDEO_FORMAT_BGRA; - video_out.width = width; - video_out.height = height; - video_out.fps_den = context->video_info.fps_den; - video_out.fps_num = context->video_info.fps_num; - video_out.cache_size = 16; - video_out.colorspace = VIDEO_CS_DEFAULT; - video_out.range = VIDEO_RANGE_DEFAULT; - video_out.name = obs_source_get_name(context->source_context); + // To get sRGB handling, render with the default effect + gs_effect_t *effect = obs_get_base_effect(OBS_EFFECT_DEFAULT); + gs_texture_t *tex = gs_texrender_get_texture(context->texrender_intermediate); + if (tex) { + const bool linear_srgb = gs_get_linear_srgb(); - video_output_close(context->video_output); + const bool previous = gs_framebuffer_srgb_enabled(); + gs_enable_framebuffer_srgb(linear_srgb); - context->width = width; - context->height = height; - video_output_open(&context->video_output, &video_out); - video_output_connect(context->video_output, nullptr, win_spout_filter_raw_video, context); + gs_eparam_t *image = + gs_effect_get_param_by_name(effect, "image"); + if (linear_srgb) + gs_effect_set_texture_srgb(image, tex); + else + gs_effect_set_texture(image, tex); + while (gs_effect_loop(effect, "Draw")) + gs_draw_sprite(tex, 0, width, height); + gs_enable_framebuffer_srgb(previous); } - struct video_frame output_frame; - if (video_output_lock_frame(context->video_output, - &output_frame, 1, obs_get_video_frame_time())) - { - if (context->video_data) { - gs_stagesurface_unmap(context->stagesurface); - context->video_data = nullptr; - } - - gs_stage_texture(context->stagesurface, - gs_texrender_get_texture(context->texrender)); - gs_stagesurface_map(context->stagesurface, - &context->video_data, &context->video_linesize); - - uint32_t linesize = output_frame.linesize[0]; - for (uint32_t i = 0; i < context->height; ++i) { - uint32_t dst_offset = linesize * i; - uint32_t src_offset = context->video_linesize * i; - memcpy(output_frame.data[0] + dst_offset, - context->video_data + src_offset, - linesize); - } - - video_output_unlock_frame(context->video_output); + gs_blend_state_pop(); + gs_texrender_end(context->texrender_curr); + + gs_texture_t *prev_tex = + gs_texrender_get_texture(context->texrender_prev); + if (prev_tex) { + context->filter_sender->SendTexture(( + ID3D11Texture2D *)gs_texture_get_obj(prev_tex)); } + + // Swap the buffers + // Double-buffering avoids the need for a flush, and also fixes + // some issues related to G-Sync. + gs_texrender_t *tmp = context->texrender_curr; + context->texrender_curr = context->texrender_prev; + context->texrender_prev = tmp; } } @@ -194,7 +184,11 @@ void* win_spout_filter_create(obs_data_t* settings, obs_source_t* source) struct win_spout_filter* context = (win_spout_filter*)bzalloc(sizeof(win_spout_filter)); context->source_context = source; - context->texrender = gs_texrender_create(GS_BGRA, GS_ZS_NONE); + // Use a Spout-compatible texture format + context->texrender_curr = gs_texrender_create(GS_BGRA_UNORM, GS_ZS_NONE); + context->texrender_prev = gs_texrender_create(GS_BGRA_UNORM, GS_ZS_NONE); + context->texrender_intermediate = + gs_texrender_create(GS_BGRA, GS_ZS_NONE); context->sender_name = obs_data_get_string(settings, FILTER_PROP_NAME); context->video_data = nullptr; @@ -203,15 +197,9 @@ void* win_spout_filter_create(obs_data_t* settings, obs_source_t* source) obs_get_video_info(&context->video_info); win_spout_filter_update(context, settings); - - if (openDX11(context)) { - pthread_mutex_init_value(&context->mutex); - if (pthread_mutex_init(&context->mutex, NULL) == 0) - { - return context; - } + return context; } blog(LOG_ERROR, "Failed to create spout output!"); @@ -234,8 +222,9 @@ void win_spout_filter_destroy(void* data) video_output_close(context->video_output); gs_stagesurface_unmap(context->stagesurface); gs_stagesurface_destroy(context->stagesurface); - gs_texrender_destroy(context->texrender); - pthread_mutex_destroy(&context->mutex); + gs_texrender_destroy(context->texrender_intermediate); + gs_texrender_destroy(context->texrender_prev); + gs_texrender_destroy(context->texrender_curr); bfree(context); } } @@ -259,7 +248,7 @@ struct obs_source_info create_spout_filter_info() struct obs_source_info win_spout_filter_info = {}; win_spout_filter_info.id = "win_spout_filter"; win_spout_filter_info.type = OBS_SOURCE_TYPE_FILTER; - win_spout_filter_info.output_flags = OBS_SOURCE_VIDEO; + win_spout_filter_info.output_flags = OBS_SOURCE_VIDEO | OBS_SOURCE_SRGB; win_spout_filter_info.get_name = win_spout_filter_getname; win_spout_filter_info.get_properties = win_spout_filter_getproperties; win_spout_filter_info.get_defaults = win_spout_filter_getdefaults;