From 28bf94a57423caf2782c21f68dd96e4087ebdfa9 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Sat, 30 Sep 2023 00:46:49 +0200 Subject: [PATCH 01/10] Fix window positioning and maximization logic The code to restore the window position and size from saved data did not handle the case of being maximized on a non-primary monitor, nor did it adjust the window position when the stored location was completely outside of all active monitors. This change addresses both these problems and removes some vestigal code around display affinity and always-on-top logic which we don't use. --- engine/core/core_video.cpp | 4 - engine/system/sys_video.h | 5 +- engine/system/win/sys_video.cpp | 129 +++++++++++++++++++------------- 3 files changed, 76 insertions(+), 62 deletions(-) diff --git a/engine/core/core_video.cpp b/engine/core/core_video.cpp index 1100dcb..a271049 100644 --- a/engine/core/core_video.cpp +++ b/engine/core/core_video.cpp @@ -46,7 +46,6 @@ class core_video_c: public core_IVideo, public conCmdHandler_c { sys_IMain* sys; conVar_c* vid_mode; - conVar_c* vid_display; conVar_c* vid_fullscreen; conVar_c* vid_resizable; conVar_c* vid_last; @@ -69,7 +68,6 @@ core_video_c::core_video_c(sys_IMain* sysHnd) : conCmdHandler_c(sysHnd->con), sys(sysHnd) { vid_mode = sys->con->Cvar_Add("vid_mode", CV_ARCHIVE|CV_CLAMP, CFG_VID_DEFMODE, -1, VID_NUMMODES-1); - vid_display = sys->con->Cvar_Add("vid_display", CV_ARCHIVE|CV_CLAMP, CFG_VID_DEFDISPLAY, -1, 15); vid_fullscreen = sys->con->Cvar_Add("vid_fullscreen", CV_ARCHIVE, CFG_VID_DEFFULLSCREEN); vid_resizable = sys->con->Cvar_Add("vid_resizable", CV_ARCHIVE|CV_CLAMP, CFG_VID_DEFRESIZABLE, 0, 3); vid_last = sys->con->Cvar_Add("vid_last", CV_ARCHIVE, ""); @@ -100,7 +98,6 @@ void core_video_c::Apply(bool shown) } } } - set.display = vid_display->intVal; if (vid_mode->intVal >= 0) { set.mode[0] = (std::max)(vid_modeList[vid_mode->intVal][0], CFG_VID_MINWIDTH); set.mode[1] = (std::max)(vid_modeList[vid_mode->intVal][1], CFG_VID_MINHEIGHT); @@ -108,7 +105,6 @@ void core_video_c::Apply(bool shown) set.mode[0] = 0; set.mode[1] = 0; } - set.depth = 0; set.minSize[0] = CFG_VID_MINWIDTH; set.minSize[1] = CFG_VID_MINHEIGHT; sys->video->Apply(&set); diff --git a/engine/system/sys_video.h b/engine/system/sys_video.h index 708c50b..0d7600a 100644 --- a/engine/system/sys_video.h +++ b/engine/system/sys_video.h @@ -10,7 +10,6 @@ // Video settings flags enum vidFlags_e { - VID_TOPMOST = 0x02, VID_RESIZABLE = 0x04, VID_MAXIMIZE = 0x08, VID_USESAVED = 0x10, @@ -29,9 +28,7 @@ struct sys_vidSave_s { struct sys_vidSet_s { bool shown = false; // Show window? int flags = 0; // Flags - int display = 0; // Display number - int mode[2] = {}; // Resolution or window size - int depth = 0; // Bit depth + int mode[2] = {}; // Window size int minSize[2] = {}; // Minimum size for resizable windows sys_vidSave_s save; // Saved state }; diff --git a/engine/system/win/sys_video.cpp b/engine/system/win/sys_video.cpp index c6f4350..2d83d94 100644 --- a/engine/system/win/sys_video.cpp +++ b/engine/system/win/sys_video.cpp @@ -296,35 +296,30 @@ int sys_video_c::Apply(sys_vidSet_s* set) } priMon = 0; + struct WindowRect { + int left, top; + int right, bottom; + }; + // Determine which monitor to create window on - if (cur.display >= numMon) { - sys->con->Warning("display #%d doesn't exist (max display number is %d)", cur.display, numMon - 1); - cur.display = 0; - } - else if (cur.display < 0) { - // Use monitor containing the mouse cursor if available, otherwise primary monitor - cur.display = 0; - if (auto curPos = PlatformGetCursorPos()) { - auto [curX, curY] = *curPos; - for (int m = 0; m < numMon; ++m) { - int right = mon[m].left + mon[m].width; - int bottom = mon[m].top + mon[m].height; - if (curX >= mon[m].left && curY >= mon[m].top && curX < right && curY < bottom) { - cur.display = m; - break; - } + // Use monitor containing the mouse cursor if available, otherwise primary monitor + int display = 0; + if (auto curPos = PlatformGetCursorPos()) { + auto [curX, curY] = *curPos; + for (int m = 0; m < numMon; ++m) { + int right = mon[m].left + mon[m].width; + int bottom = mon[m].top + mon[m].height; + if (curX >= mon[m].left && curY >= mon[m].top && curX < right && curY < bottom) { + display = m; + break; } } } - defRes[0] = mon[cur.display].width; - defRes[1] = mon[cur.display].height; + defRes[0] = mon[display].width; + defRes[1] = mon[display].height; minSize[0] = minSize[1] = 0; - if (sys->debuggerRunning) { - // Force topmost off if debugger is attached - cur.flags &= ~VID_TOPMOST; - } if (cur.mode[0] == 0) { // Use default resolution if one isn't specified Vector2Copy(defRes, cur.mode); @@ -332,17 +327,10 @@ int sys_video_c::Apply(sys_vidSet_s* set) Vector2Copy(cur.mode, vid.size); Vector2Copy(defRes, scrSize); - struct WindowRect { - int left, top; - int right, bottom; - }; - // Get window rectangle - WindowRect wrec; + WindowRect wrec{}; + std::optional intersectedMonitor; if (cur.flags & VID_USESAVED) { - // TODO(LV): Move offscreen windows to a monitor. - wrec.left = cur.save.pos[0]; - wrec.top = cur.save.pos[1]; if (cur.save.maximised) { cur.flags |= VID_MAXIMIZE; } @@ -350,35 +338,61 @@ int sys_video_c::Apply(sys_vidSet_s* set) cur.mode[0] = cur.save.size[0]; cur.mode[1] = cur.save.size[1]; } + + wrec.left = cur.save.pos[0]; + wrec.top = cur.save.pos[1]; + wrec.right = wrec.left + cur.mode[0]; + wrec.bottom = wrec.top + cur.mode[1]; + + for (int m = 0; m < numMon; ++m) { + WindowRect drec{ mon[m].left, mon[m].top }; + drec.right = drec.left + mon[m].width; + drec.bottom = drec.top + mon[m].height; + + // A.lo < B.hi && A.hi > B.lo (half-open rects) + bool intersectsDisplay = drec.left < wrec.right && drec.top < wrec.bottom && drec.right > wrec.left && drec.bottom > wrec.top; + if (!intersectedMonitor && intersectsDisplay) { + intersectedMonitor = m; + break; + } + } } - else { - wrec.left = (scrSize[0] - cur.mode[0]) / 2 + mon[cur.display].left; - wrec.top = (scrSize[1] - cur.mode[1]) / 2 + mon[cur.display].top; + + if (!intersectedMonitor) { + wrec.left = (scrSize[0] - cur.mode[0]) / 2 + mon[display].left; + wrec.top = (scrSize[1] - cur.mode[1]) / 2 + mon[display].top; } vid.pos[0] = wrec.left; vid.pos[1] = wrec.top; - wrec.right = wrec.left + cur.mode[0]; - wrec.bottom = wrec.top + cur.mode[1]; - // TODO(LV): Verify that stored coordinates are aligned right. if (initialised) { - glfwSetWindowSize(wnd, cur.mode[0], cur.mode[1]); + if (!!glfwGetWindowAttrib(wnd, GLFW_MAXIMIZED)) { + glfwRestoreWindow(wnd); + } + glfwSetWindowPos(wnd, wrec.left, wrec.top); + glfwSetWindowSize(wnd, wrec.right - wrec.left, wrec.bottom - wrec.top); + if (cur.flags & VID_MAXIMIZE) { + glfwMaximizeWindow(wnd); + } if (cur.shown) { glfwShowWindow(wnd); sys->conWin->SetForeground(); } + else { + glfwHideWindow(wnd); + } } else { glfwWindowHint(GLFW_RESIZABLE, !!(cur.flags & VID_RESIZABLE)); - glfwWindowHint(GLFW_VISIBLE, GLFW_TRUE); - glfwWindowHint(GLFW_FLOATING, !!(cur.flags & VID_TOPMOST)); - glfwWindowHint(GLFW_MAXIMIZED, !!(cur.flags & VID_MAXIMIZE)); + glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE); // Start hidden to not flash the user with a stock window. + glfwWindowHint(GLFW_MAXIMIZED, GLFW_FALSE); // Start restored in order to position the window before maximizing. glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_ES_API); glfwWindowHint(GLFW_CONTEXT_CREATION_API, GLFW_EGL_CONTEXT_API); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0); glfwWindowHint(GLFW_DEPTH_BITS, 24); //glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE); + wnd = glfwCreateWindow(cur.mode[0], cur.mode[1], curTitle, nullptr, nullptr); if (!wnd) { char const* errDesc = "Unknown error"; @@ -386,19 +400,10 @@ int sys_video_c::Apply(sys_vidSet_s* set) sys->con->Printf("Could not create window, %s\n", errDesc); } - { - sys_programIcons_c icons; - if (icons.Size() > 0) { - glfwSetWindowIcon(wnd, (int)icons.Size(), icons.Data()); - } - } glfwMakeContextCurrent(wnd); gladLoadGLES2(glfwGetProcAddress); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - glfwSwapBuffers(wnd); - + // Set up all our window callbacks glfwSetWindowUserPointer(wnd, sys); glfwSetCursorEnterCallback(wnd, [](GLFWwindow* wnd, int entered) { auto sys = (sys_main_c*)glfwGetWindowUserPointer(wnd); @@ -544,11 +549,27 @@ int sys_video_c::Apply(sys_vidSet_s* set) auto sys = (sys_main_c*)glfwGetWindowUserPointer(wnd); sys->video->vid.dpiScale = xScale; }); - } - glfwSetWindowSizeLimits(wnd, cur.minSize[0], cur.minSize[1], GLFW_DONT_CARE, GLFW_DONT_CARE); + // Adjust window look and position + { + sys_programIcons_c icons; + if (icons.Size() > 0) { + glfwSetWindowIcon(wnd, (int)icons.Size(), icons.Data()); + } + } + glfwSetWindowSizeLimits(wnd, cur.minSize[0], cur.minSize[1], GLFW_DONT_CARE, GLFW_DONT_CARE); + glfwSetWindowPos(wnd, vid.pos[0], vid.pos[1]); + if (!!(cur.flags & VID_MAXIMIZE)) { + glfwMaximizeWindow(wnd); + } + glfwShowWindow(wnd); + + // Clear early to avoid flash + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glfwSwapBuffers(wnd); + } - glfwSetWindowPos(wnd, vid.pos[0], vid.pos[1]); glfwGetFramebufferSize(wnd, &vid.fbSize[0], &vid.fbSize[1]); glfwGetWindowSize(wnd, &vid.size[0], &vid.size[1]); glfwGetWindowContentScale(wnd, &sys->video->vid.dpiScale, nullptr); From e06849f26a5b8070de35cf4fac785f9d10408f73 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Mon, 18 Mar 2024 15:48:30 +0100 Subject: [PATCH 02/10] Add glm and missing includes/files to CMakeLists --- .gitmodules | 3 +++ CMakeLists.txt | 4 ++++ dep/glm | 1 + 3 files changed, 8 insertions(+) create mode 160000 dep/glm diff --git a/.gitmodules b/.gitmodules index 4040058..b8b7c9a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "dep/imgui"] path = dep/imgui url = https://github.com/ocornut/imgui.git +[submodule "dep/glm"] + path = dep/glm + url = https://github.com/g-truc/glm.git diff --git a/CMakeLists.txt b/CMakeLists.txt index a6e1e86..373faa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ include(${PROJECT_SOURCE_DIR}/vcpkg/scripts/buildsystems/vcpkg.cmake) set(CMAKE_INSTALL_SYSTEM_RUNTIME_DESTINATION ".") include(InstallRequiredSystemLibraries) +add_subdirectory(dep/glm) set(SIMPLEGRAPHIC_SOURCES "config.h" @@ -25,6 +26,7 @@ set(SIMPLEGRAPHIC_SOURCES "dep/stb/stb_image_resize.h" "dep/stb/stb_image_write.h" "engine/common/common.cpp" + "engine/common.h" "engine/common/console.cpp" "engine/common/console.h" "engine/common/keylist.h" @@ -137,6 +139,7 @@ target_compile_definitions(imgui PUBLIC target_include_directories(imgui PUBLIC dep/imgui dep/imgui/backends + dep/imgui/misc/cpp ) target_link_libraries(imgui PUBLIC @@ -181,6 +184,7 @@ target_link_libraries(SimpleGraphic unofficial::angle::libGLESv2 fmt::fmt glfw + glm::glm imgui LuaJIT::LuaJIT re2::re2 diff --git a/dep/glm b/dep/glm new file mode 160000 index 0000000..47585fd --- /dev/null +++ b/dep/glm @@ -0,0 +1 @@ +Subproject commit 47585fde0c49fa77a2bf2fb1d2ead06999fd4b6e From a0bd7867d7d2927302567969379d08d918caa714 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Sat, 7 Oct 2023 06:29:22 +0200 Subject: [PATCH 03/10] Adopt flat layer command buffers, float geometry By preallocating a big flat command buffer for each layer that stores each draw command in sequence we can get rid of some of the pointer chasing of the pool of separate draw commands, instead reading data sequentially from the stream of draw commands. The stream can be walked from front to back by skipping ahead by the computed size of each command. While making these changes the underlying floating point type for vertex positions and texcoords was changed from doubles to floats to save significant storage space each frame. --- engine/render/r_font.cpp | 28 +- engine/render/r_main.cpp | 673 +++++++++++++++++++++++++-------------- engine/render/r_main.h | 45 ++- 3 files changed, 492 insertions(+), 254 deletions(-) diff --git a/engine/render/r_font.cpp b/engine/render/r_font.cpp index 83ea1b2..deb564e 100644 --- a/engine/render/r_font.cpp +++ b/engine/render/r_font.cpp @@ -17,10 +17,10 @@ // Glyph parameters struct f_glyph_s { - double tcLeft = 0.0; - double tcRight = 0.0; - double tcTop = 0.0; - double tcBottom = 0.0; + float tcLeft = 0.0; + float tcRight = 0.0; + float tcTop = 0.0; + float tcBottom = 0.0; int width = 0; int spLeft = 0; int spRight = 0; @@ -32,7 +32,7 @@ struct f_fontHeight_s { int height; int numGlyph; f_glyph_s glyphs[128]; - f_glyph_s defGlyph{0.0, 0.0, 0.0, 0.0, 0, 0, 0}; + f_glyph_s defGlyph{0.0f, 0.0f, 0.0f, 0.0f, 0, 0, 0}; f_glyph_s const& Glyph(char ch) const { if ((unsigned char)ch >= numGlyph) { @@ -84,10 +84,10 @@ r_font_c::r_font_c(r_renderer_c* renderer, const char* fontName) // Add glyph if (fh->numGlyph >= 128) continue; f_glyph_s* glyph = &fh->glyphs[fh->numGlyph++]; - glyph->tcLeft = (double)x / fh->tex->fileWidth; - glyph->tcRight = (double)(x + w) / fh->tex->fileWidth; - glyph->tcTop = (double)y / fh->tex->fileHeight; - glyph->tcBottom = (double)(y + fh->height) / fh->tex->fileHeight; + glyph->tcLeft = (float)x / fh->tex->fileWidth; + glyph->tcRight = (float)(x + w) / fh->tex->fileWidth; + glyph->tcTop = (float)y / fh->tex->fileHeight; + glyph->tcBottom = (float)(y + fh->height) / fh->tex->fileHeight; glyph->width = w; glyph->spLeft = sl; glyph->spRight = sr; @@ -240,14 +240,14 @@ void r_font_c::DrawTextLine(scp_t pos, int align, int height, col4_t col, const // Find best height to use f_fontHeight_s *fh = fontHeights[height > maxHeight? (numFontHeight - 1) : fontHeightMap[height]]; - double scale = (double)height / fh->height; + float scale = (float)height / fh->height; // Calculate the string position - double x = pos[X]; - double y = pos[Y]; + float x = pos[X]; + float y = pos[Y]; if (align != F_LEFT) { // Calculate the real width of the string - double width = StringWidthInternal(fh, str) * scale; + float width = StringWidthInternal(fh, str) * scale; switch (align) { case F_CENTRE: x = floor((renderer->VirtualScreenWidth() - width) / 2.0f + pos[X]); @@ -297,7 +297,7 @@ void r_font_c::DrawTextLine(scp_t pos, int align, int height, col4_t col, const auto& glyph = fh->Glyph(*str++); x+= glyph.spLeft * scale; if (glyph.width) { - double w = glyph.width * scale; + float w = glyph.width * scale; if (x + w >= 0 && x < renderer->VirtualScreenWidth()) { renderer->curLayer->Quad( glyph.tcLeft, glyph.tcTop, x, y, diff --git a/engine/render/r_main.cpp b/engine/render/r_main.cpp index aeb7dcc..1d18078 100644 --- a/engine/render/r_main.cpp +++ b/engine/render/r_main.cpp @@ -8,9 +8,11 @@ #define IMGUI_DEFINE_MATH_OPERATORS #include "r_local.h" +#include #include #include #include +#include #include #include #include @@ -20,6 +22,9 @@ #include #include +#include + +static uint64_t MurmurHash64A(void const* data, int len, uint64_t seed); // ======= // Classes @@ -128,96 +133,150 @@ Mat4 OrthoMatrix(double left, double right, double bottom, double top, double ne // ================= struct r_layerCmd_s { - enum { + enum Command { VIEWPORT, BLEND, BIND, COLOR, QUAD, } cmd; - union { - r_viewport_s viewport; - int blendMode; - r_tex_c* tex; - col4_t col; - struct { - double s[4]; - double t[4]; - double x[4]; - double y[4]; - } quad; - }; +}; + +struct r_layerCmdViewport_s { + r_layerCmd_s::Command cmd; + r_viewport_s viewport; +}; + +struct r_layerCmdBlend_s { + r_layerCmd_s::Command cmd; + int blendMode; +}; + +struct r_layerCmdBind_s { + r_layerCmd_s::Command cmd; + r_tex_c* tex; +}; + +struct r_layerCmdColor_s { + r_layerCmd_s::Command cmd; + col4_t col; +}; + +struct r_layerCmdQuad_s { + r_layerCmd_s::Command cmd; + struct { + float s[4]; + float t[4]; + float x[4]; + float y[4]; + } quad; }; r_layer_c::r_layer_c(r_renderer_c* renderer, int layer, int subLayer) : renderer(renderer), layer(layer), subLayer(subLayer) { + cmdStorage.resize(1ull << 23); + cmdCursor = 0; numCmd = 0; - cmdSize = 8; - cmdList = new r_layerCmd_s * [cmdSize]; } r_layer_c::~r_layer_c() { - delete cmdList; } -r_layerCmd_s* r_layer_c::NewCommand() +static size_t CommandSize(r_layerCmd_s::Command cmd, size_t extraSize = 0) { + using Tag = r_layerCmd_s::Command; + switch (cmd) { + case Tag::VIEWPORT: return sizeof(r_layerCmdViewport_s); + case Tag::BLEND: return sizeof(r_layerCmdBlend_s); + case Tag::BIND: return sizeof(r_layerCmdBind_s); + case Tag::COLOR: return sizeof(r_layerCmdColor_s); + case Tag::QUAD: return sizeof(r_layerCmdQuad_s); + default: + abort(); + } +} + +r_layer_c::CmdHandle r_layer_c::GetFirstCommand() { - r_layerCmd_s* cmd; - if (renderer->layerCmdBinCount) { - cmd = renderer->layerCmdBin[--renderer->layerCmdBinCount]; + CmdHandle ret{}; + ret.offset = 0; + if (cmdCursor > 0) { + ret.cmd = (r_layerCmd_s*)cmdStorage.data(); } - else { - cmd = new r_layerCmd_s; + return ret; +} + +bool r_layer_c::GetNextCommand(r_layer_c::CmdHandle& handle) +{ + if (handle.cmd == nullptr) { + return false; } - if (numCmd == cmdSize) { - cmdSize <<= 1; - trealloc(cmdList, cmdSize); + handle.offset += (uint32_t)CommandSize(handle.cmd->cmd); + if (handle.offset >= cmdCursor) { + handle.cmd = nullptr; + return false; } - cmdList[numCmd++] = cmd; - return cmd; + handle.cmd = (r_layerCmd_s*)(cmdStorage.data() + handle.offset); + return true; +} + +r_layerCmd_s* r_layer_c::NewCommand(size_t size) +{ + size_t const cmdEnd = cmdCursor + size; + if (cmdEnd >= cmdStorage.size()) { + return nullptr; + } + auto *ret = (r_layerCmd_s*)(cmdStorage.data() + cmdCursor); + cmdCursor = cmdEnd; + ++numCmd; + return ret; } void r_layer_c::SetViewport(r_viewport_s* viewport) { - r_layerCmd_s* cmd = NewCommand(); - cmd->cmd = r_layerCmd_s::VIEWPORT; - cmd->viewport.x = viewport->x; - cmd->viewport.y = viewport->y; - cmd->viewport.width = viewport->width; - cmd->viewport.height = viewport->height; + if (auto* cmd = (r_layerCmdViewport_s*)NewCommand(CommandSize(r_layerCmd_s::VIEWPORT))) { + cmd->cmd = r_layerCmd_s::VIEWPORT; + cmd->viewport.x = viewport->x; + cmd->viewport.y = viewport->y; + cmd->viewport.width = viewport->width; + cmd->viewport.height = viewport->height; + } } void r_layer_c::SetBlendMode(int mode) { - r_layerCmd_s* cmd = NewCommand(); - cmd->cmd = r_layerCmd_s::BLEND; - cmd->blendMode = mode; + if (auto* cmd = (r_layerCmdBlend_s*)NewCommand(CommandSize(r_layerCmd_s::BLEND))) { + cmd->cmd = r_layerCmd_s::BLEND; + cmd->blendMode = mode; + } } void r_layer_c::Bind(r_tex_c* tex) { - r_layerCmd_s* cmd = NewCommand(); - cmd->cmd = r_layerCmd_s::BIND; - cmd->tex = tex; + if (auto* cmd = (r_layerCmdBind_s*)NewCommand(CommandSize(r_layerCmd_s::BIND))) { + cmd->cmd = r_layerCmd_s::BIND; + cmd->tex = tex; + } } void r_layer_c::Color(col4_t col) { - r_layerCmd_s* cmd = NewCommand(); - cmd->cmd = r_layerCmd_s::COLOR; - Vector4Copy(col, cmd->col); + if (auto* cmd = (r_layerCmdColor_s*)NewCommand(CommandSize(r_layerCmd_s::COLOR))) { + cmd->cmd = r_layerCmd_s::COLOR; + Vector4Copy(col, cmd->col); + } } -void r_layer_c::Quad(double s0, double t0, double x0, double y0, double s1, double t1, double x1, double y1, double s2, double t2, double x2, double y2, double s3, double t3, double x3, double y3) +void r_layer_c::Quad(float s0, float t0, float x0, float y0, float s1, float t1, float x1, float y1, float s2, float t2, float x2, float y2, float s3, float t3, float x3, float y3) { - r_layerCmd_s* cmd = NewCommand(); - cmd->cmd = r_layerCmd_s::QUAD; - cmd->quad.s[0] = s0; cmd->quad.s[1] = s1; cmd->quad.s[2] = s2; cmd->quad.s[3] = s3; - cmd->quad.t[0] = t0; cmd->quad.t[1] = t1; cmd->quad.t[2] = t2; cmd->quad.t[3] = t3; - cmd->quad.x[0] = x0; cmd->quad.x[1] = x1; cmd->quad.x[2] = x2; cmd->quad.x[3] = x3; - cmd->quad.y[0] = y0; cmd->quad.y[1] = y1; cmd->quad.y[2] = y2; cmd->quad.y[3] = y3; + if (auto* cmd = (r_layerCmdQuad_s*)NewCommand(CommandSize(r_layerCmd_s::QUAD))) { + cmd->cmd = r_layerCmd_s::QUAD; + cmd->quad.s[0] = s0; cmd->quad.s[1] = s1; cmd->quad.s[2] = s2; cmd->quad.s[3] = s3; + cmd->quad.t[0] = t0; cmd->quad.t[1] = t1; cmd->quad.t[2] = t2; cmd->quad.t[3] = t3; + cmd->quad.x[0] = x0; cmd->quad.x[1] = x1; cmd->quad.x[2] = x2; cmd->quad.x[3] = x3; + cmd->quad.y[0] = y0; cmd->quad.y[1] = y1; cmd->quad.y[2] = y2; cmd->quad.y[3] = y3; + } } // ================= @@ -229,7 +288,7 @@ struct r_aabb_s { float hi[2]; }; -r_aabb_s AabbFromCmdQuad(decltype(r_layerCmd_s::quad)& q, r_viewport_s& vp) +r_aabb_s AabbFromCmdQuad(decltype(r_layerCmdQuad_s::quad)& q, r_viewport_s& vp) { r_aabb_s r{ {+FLT_MAX, +FLT_MAX}, @@ -411,35 +470,40 @@ struct AdjacentMergeStrategy : RenderStrategy { void ProcessCommand(r_layerCmd_s* cmd) override { switch (cmd->cmd) { - case r_layerCmd_s::VIEWPORT: - nextViewport_ = cmd->viewport; + case r_layerCmd_s::VIEWPORT: { + auto* c = (r_layerCmdViewport_s*)cmd; + nextViewport_ = c->viewport; if (showStats_) { - // ImGui::Text("VIEWPORT: %dx%d @ %d,%d", cmd->viewport.width, cmd->viewport.height, cmd->viewport.x, cmd->viewport.y); + // ImGui::Text("VIEWPORT: %dx%d @ %d,%d", c->viewport.width, c->viewport.height, c->viewport.x, c->viewport.y); } - break; - case r_layerCmd_s::BLEND: - latchKey_.blendMode = cmd->blendMode; + } break; + case r_layerCmd_s::BLEND: { + auto* c = (r_layerCmdBlend_s*)cmd; + latchKey_.blendMode = c->blendMode; if (showStats_) { - // ImGui::Text("BLEND: %s", s_blendModeString.at((r_blendMode_e)cmd->blendMode)); + // ImGui::Text("BLEND: %s", s_blendModeString.at((r_blendMode_e)c->blendMode)); } - break; - case r_layerCmd_s::BIND: - nextTex_ = cmd->tex; + } break; + case r_layerCmd_s::BIND: { + auto* c = (r_layerCmdBind_s*)cmd; + nextTex_ = c->tex; if (showStats_) { - // ImGui::Text("TEX: %s", cmd->tex->fileName.c_str()); + // ImGui::Text("TEX: %s", c->tex->fileName.c_str()); } - break; - case r_layerCmd_s::COLOR: - std::copy_n(cmd->col, 4, tint_.data()); - break; + } break; + case r_layerCmd_s::COLOR: { + auto* c = (r_layerCmdColor_s*)cmd; + std::copy_n(c->col, 4, tint_.data()); + } break; case r_layerCmd_s::QUAD: { + auto* c = (r_layerCmdQuad_s*)cmd; if (showStats_) { // ImGui::Text("QUAD"); } // Cull the quad first before it influences any boundary cuts. if (!!renderer_->r_drawCull->intVal) { - auto a = AabbFromCmdQuad(cmd->quad, nextViewport_); + auto a = AabbFromCmdQuad(c->quad, nextViewport_); auto b = AabbFromViewport(nextViewport_); bool intersects = AabbAabbIntersects(a, b); if (!intersects) { @@ -474,10 +538,10 @@ struct AdjacentMergeStrategy : RenderStrategy { for (int v = 0; v < 4; v++) { auto& q = quad[v]; auto& vp = nextViewport_; - q.u = (float)cmd->quad.s[v]; - q.v = (float)cmd->quad.t[v]; - q.x = (float)cmd->quad.x[v]; - q.y = (float)cmd->quad.y[v]; + q.u = c->quad.s[v]; + q.v = c->quad.t[v]; + q.x = c->quad.x[v]; + q.y = c->quad.y[v]; q.r = tint_[0]; q.g = tint_[1]; q.b = tint_[2]; @@ -636,9 +700,8 @@ void r_layer_c::Render() } strat->SetShowStats(showStats); - for (int i = 0; i < numCmd; i++) { - r_layerCmd_s* cmd = cmdList[i]; - strat->ProcessCommand(cmd); + for (CmdHandle cmdH = GetFirstCommand(); cmdH.cmd != nullptr; GetNextCommand(cmdH)) { + strat->ProcessCommand(cmdH.cmd); } strat->Flush(); @@ -648,15 +711,6 @@ void r_layer_c::Render() } } - for (int i = 0; i < numCmd; i++) { - r_layerCmd_s* cmd = cmdList[i]; - if (renderer->layerCmdBinCount == renderer->layerCmdBinSize) { - renderer->layerCmdBinSize <<= 1; - trealloc(renderer->layerCmdBin, renderer->layerCmdBinSize); - } - renderer->layerCmdBin[renderer->layerCmdBinCount++] = cmd; - } - numCmd = 0; if (renderer->glPopGroupMarkerEXT) { renderer->glPopGroupMarkerEXT(); } @@ -664,14 +718,7 @@ void r_layer_c::Render() void r_layer_c::Discard() { - for (int i = 0; i < numCmd; i++) { - r_layerCmd_s* cmd = cmdList[i]; - if (renderer->layerCmdBinCount == renderer->layerCmdBinSize) { - renderer->layerCmdBinSize <<= 1; - trealloc(renderer->layerCmdBin, renderer->layerCmdBinSize); - } - renderer->layerCmdBin[renderer->layerCmdBinCount++] = cmd; - } + cmdCursor = 0; numCmd = 0; } @@ -959,47 +1006,53 @@ void r_renderer_c::Init() takeScreenshot = R_SSNONE; // Set up DPI-scaling render target - { - glGenFramebuffers(1, &rttMain.framebuffer); - glGenTextures(1, &rttMain.colorTexture); - - auto compileShader = [](std::string_view src, GLenum type) -> GLuint { - GLuint id = glCreateShader(type); - auto sourcePtr = src.data(); - glShaderSource(id, 1, &sourcePtr, nullptr); - glCompileShader(id); - return id; - }; - - auto vsId = compileShader(s_scaleVsSource, GL_VERTEX_SHADER); - if (!GetShaderCompileSuccess(vsId)) { - auto log = GetShaderInfoLog(vsId); - sys->con->Printf("Scaling VS compile failure: %s\n", log.c_str()); - } - auto fsId = compileShader(s_scaleFsSource, GL_FRAGMENT_SHADER); - if (!GetShaderCompileSuccess(fsId)) { - auto log = GetShaderInfoLog(fsId); - sys->con->Printf("Scaling FS compile failure: %s\n", log.c_str()); + for (int i = 0; i < 2; ++i) { + auto& rtt = rttMain[i]; + if (i > 0) { + rtt = rttMain[0]; // Reuse shared parts like dimensions and program/locations. } + glGenFramebuffers(1, &rtt.framebuffer); + glGenTextures(1, &rtt.colorTexture); + + if (i == 0) { + auto compileShader = [](std::string_view src, GLenum type) -> GLuint { + GLuint id = glCreateShader(type); + auto sourcePtr = src.data(); + glShaderSource(id, 1, &sourcePtr, nullptr); + glCompileShader(id); + return id; + }; - GLuint prog = rttMain.blitProg = glCreateProgram(); - glAttachShader(prog, vsId); - glAttachShader(prog, fsId); - glLinkProgram(prog); - if (!GetProgramLinkSuccess(prog)) { - auto log = GetProgramInfoLog(prog); - sys->con->Printf("Scaling program link failure: %s\n", log.c_str()); - } + auto vsId = compileShader(s_scaleVsSource, GL_VERTEX_SHADER); + if (!GetShaderCompileSuccess(vsId)) { + auto log = GetShaderInfoLog(vsId); + sys->con->Printf("Scaling VS compile failure: %s\n", log.c_str()); + } + auto fsId = compileShader(s_scaleFsSource, GL_FRAGMENT_SHADER); + if (!GetShaderCompileSuccess(fsId)) { + auto log = GetShaderInfoLog(fsId); + sys->con->Printf("Scaling FS compile failure: %s\n", log.c_str()); + } + + GLuint prog = rtt.blitProg = glCreateProgram(); + glAttachShader(prog, vsId); + glAttachShader(prog, fsId); + glLinkProgram(prog); + if (!GetProgramLinkSuccess(prog)) { + auto log = GetProgramInfoLog(prog); + sys->con->Printf("Scaling program link failure: %s\n", log.c_str()); + } - GLint linked = GL_FALSE; - glGetProgramiv(prog, GL_LINK_STATUS, &linked); + GLint linked = GL_FALSE; + glGetProgramiv(prog, GL_LINK_STATUS, &linked); - glDeleteShader(vsId); - glDeleteShader(fsId); + glDeleteShader(vsId); + glDeleteShader(fsId); - rttMain.blitAttribLocPos = glGetAttribLocation(prog, "a_position"); - rttMain.blitAttibLocTC = glGetAttribLocation(prog, "a_texcoord"); - rttMain.blitSampleLocColour = glGetUniformLocation(prog, "s_tex"); + rtt.blitAttribLocPos = glGetAttribLocation(prog, "a_position"); + rtt.blitAttribLocTC = glGetAttribLocation(prog, "a_texcoord"); + rtt.blitSampleLocColour = glGetUniformLocation(prog, "s_tex"); + } } // Load render resources @@ -1049,9 +1102,12 @@ void r_renderer_c::Shutdown() } delete layerCmdBin; - glDeleteTextures(1, &rttMain.colorTexture); - glDeleteFramebuffers(1, &rttMain.framebuffer); - glDeleteProgram(rttMain.blitProg); + for (int i = 0; i < 2; ++i) { + auto& rtt = rttMain[i]; + glDeleteTextures(1, &rtt.colorTexture); + glDeleteFramebuffers(1, &rtt.framebuffer); + } + glDeleteProgram(rttMain[0].blitProg); // Shutdown texture manager r_ITexManager::FreeHandle(texMan); @@ -1076,27 +1132,30 @@ void r_renderer_c::BeginFrame() auto& vid = sys->video->vid; int wNew = VirtualScreenWidth(); int hNew = VirtualScreenHeight(); - if (rttMain.width != wNew || rttMain.height != hNew) { - GLint prevTex2D, prevFB; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &prevTex2D); - glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prevFB); - glBindTexture(GL_TEXTURE_2D, rttMain.colorTexture); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, wNew, hNew, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - - rttMain.width = wNew; - rttMain.height = hNew; - - glBindFramebuffer(GL_FRAMEBUFFER, rttMain.framebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rttMain.colorTexture, 0); - - glCheckFramebufferStatus(GL_FRAMEBUFFER); - - glBindFramebuffer(GL_FRAMEBUFFER, prevFB); - glBindTexture(GL_TEXTURE_2D, prevTex2D); + for (int i = 0; i < 2; ++i) { + auto& rtt = rttMain[i]; + if (rtt.width != wNew || rtt.height != hNew) { + GLint prevTex2D, prevFB; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &prevTex2D); + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &prevFB); + glBindTexture(GL_TEXTURE_2D, rtt.colorTexture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, wNew, hNew, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + rtt.width = wNew; + rtt.height = hNew; + + glBindFramebuffer(GL_FRAMEBUFFER, rtt.framebuffer); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rtt.colorTexture, 0); + + glCheckFramebufferStatus(GL_FRAMEBUFFER); + + glBindFramebuffer(GL_FRAMEBUFFER, prevFB); + glBindTexture(GL_TEXTURE_2D, prevTex2D); + } } } @@ -1105,6 +1164,8 @@ void r_renderer_c::BeginFrame() SetViewport(); SetBlendMode(RB_ALPHA); DrawColor(); + + beginFrameToc = std::chrono::steady_clock::now(); } static int layerCompFunc(const void* va, const void* vb) @@ -1141,13 +1202,40 @@ void CVarCheckbox(char const* label, conVar_c* cvar) { } } +static std::string BinaryUnitPrefix(uint64_t quantity) { + if (quantity < 1ull<<10) { + return fmt::format("{} ", quantity); + } + if (quantity < 1ull << 20) { + return fmt::format("{:0.2f} Ki", quantity / 1024.0); + } + if (quantity < 1ull << 30) { + return fmt::format("{:0.2f} Mi", quantity / 1024.0 / 1024.0); + } + if (quantity < 1ull << 40) { + return fmt::format("{:0.2f} Gi", quantity / 1024.0 / 1024.0 / 1024.0); + } + if (quantity < 1ull << 50) { + return fmt::format("{:0.2f} Ti", quantity / 1024.0 / 1024.0 / 1024.0 / 1024.0); + } + if (quantity < 1ull << 60) { + return fmt::format("{:0.2f} Ti", quantity / 1024.0 / 1024.0 / 1024.0 / 1024.0 / 1024.0); + } + return fmt::format("{:0.2f} Pi", quantity / 1024.0 / 1024.0 / 1024.0 / 1024.0 / 1024.0); +} + void r_renderer_c::EndFrame() { + std::chrono::time_point endFrameTic = std::chrono::steady_clock::now(); + frameStats.AppendDuration(&FrameStats::midFrameStepDurations, endFrameTic - beginFrameToc); + static bool showDemo = false; static bool showMetrics = false; + static bool showHash = false; + static bool showTiming = false; if (debugImGui) { if (ImGui::Begin("Debug Hub", &debugImGui)) { - if (ImGui::Button("Demo")) { + if (ImGui::Button("ImGui Demo")) { showDemo = true; } if (ImGui::Button("Metrics")) { @@ -1173,18 +1261,18 @@ void r_renderer_c::EndFrame() } qsort(layerSort, numLayer, sizeof(r_layer_c*), layerCompFunc); if (r_layerDebug->intVal) { - int totalCmd = 0; + size_t totalCmd = 0; for (int l = 0; l < numLayer; l++) { totalCmd += layerSort[l]->numCmd; char str[1024]; - sprintf(str, "%d (%4d,%4d) [%2d]", layerSort[l]->numCmd, layerSort[l]->layer, layerSort[l]->subLayer, l); + sprintf(str, "%zu (%4d,%4d) [%2d]", layerSort[l]->numCmd, layerSort[l]->layer, layerSort[l]->subLayer, l); float w = (float)DrawStringWidth(16, F_FIXED, str); DrawColor(0x7F000000); DrawImage(NULL, (float)VirtualScreenWidth() - w, VirtualScreenHeight() - (l + 2) * 16.0f, w, 16); DrawStringFormat(0, VirtualScreenHeight() - (l + 2) * 16.0f, F_RIGHT, 16, colorWhite, F_FIXED, str); } char str[1024]; - sprintf(str, "%d", totalCmd); + sprintf(str, "%zu", totalCmd); float w = (float)DrawStringWidth(16, F_FIXED, str); DrawColor(0xAF000000); DrawImage(NULL, (float)VirtualScreenWidth() - w, VirtualScreenHeight() - 16.0f, w, 16); @@ -1195,17 +1283,30 @@ void r_renderer_c::EndFrame() if (debugLayers) { if (ImGui::Begin("Layers", &debugLayers)) { ImGui::Text("Layers: %d", numLayer); - ImGui::Text("%d out of %d frames drawn, %d saved.", drawnFrames, totalFrames, savedFrames); + ImGui::Text("%d out of %d frames drawn.", drawnFrames, totalFrames); CVarSliderInt("Optimization", r_layerOptimize); CVarCheckbox("Elide identical frames", r_elideFrames); CVarCheckbox("Draw command culling", r_drawCull); - int totalCmd{}; - if (ImGui::BeginTable("Layer stats", 5, ImGuiTableFlags_Borders | ImGuiTableFlags_SizingFixedFit)) { + size_t totalFootprint{}, totalDenseFootprint{}; + for (int l = 0; l < numLayer; ++l) { + size_t byteAcc{}; + auto layer = layerSort[l]; + size_t const numCmd = layer->numCmd; + totalFootprint += numCmd * sizeof(r_layerCmdQuad_s); // legacy footprint + totalDenseFootprint += layer->cmdCursor; + } + + ImGui::Text("Total payload footprint: %sB", BinaryUnitPrefix(totalFootprint).c_str()); + ImGui::Text("Total dense footprint: %sB", BinaryUnitPrefix(totalDenseFootprint).c_str()); + + size_t totalCmd{}; + if (ImGui::BeginTable("Layer stats", 7, ImGuiTableFlags_Borders | ImGuiTableFlags_SizingFixedFit)) { ImGui::TableSetupColumn("Index"); ImGui::TableSetupColumn("Layer"); ImGui::TableSetupColumn("Sublayer"); ImGui::TableSetupColumn("Command count"); + ImGui::TableSetupColumn("Dense"); ImGui::TableSetupColumn("Debug"); ImGui::TableHeadersRow(); for (int l = 0; l < numLayer; ++l) { @@ -1223,6 +1324,8 @@ void r_renderer_c::EndFrame() ImGui::TableNextColumn(); ImGui::Text("%d", layer->numCmd); ImGui::TableNextColumn(); + ImGui::Text("%sB", BinaryUnitPrefix(layer->cmdCursor).c_str()); + ImGui::TableNextColumn(); if (ImGui::Button("Debug")) { layerBreak = { layer->layer, layer->subLayer }; } @@ -1240,60 +1343,54 @@ void r_renderer_c::EndFrame() lastFrameHash.clear(); } - std::vector commandDigest(crypto_shorthash_bytes()); + std::future>> elidedFrameHashFut; if (elideFrames) { - { - std::vector commandBytes; - commandBytes.reserve(1ull << 24); - auto hash_primitive = [&](auto& x) { - auto p = (uint8_t const*)&x; - commandBytes.insert(commandBytes.end(), p, p + sizeof(x)); - }; + elidedFrameHashFut = std::async([&]() -> std::optional> { + std::vector commandDigest; + for (auto lIdx = 0; lIdx < numLayer; ++lIdx) { auto layer = layerSort[lIdx]; - hash_primitive(layer->layer); - hash_primitive(layer->subLayer); - for (auto cIdx = 0; cIdx < layer->numCmd; ++cIdx) { - auto cmd = layer->cmdList[cIdx]; - hash_primitive(cmd->cmd); - switch (cmd->cmd) { - case r_layerCmd_s::VIEWPORT: { - hash_primitive(cmd->viewport); - } break; - case r_layerCmd_s::BLEND: { - hash_primitive(cmd->blendMode); - } break; - case r_layerCmd_s::BIND: { - // not safe around handle/ID reuse but probably good enough - hash_primitive(cmd->tex); - hash_primitive(cmd->tex->texId); - auto status = cmd->tex->status.load(); - hash_primitive(status); - } break; - case r_layerCmd_s::COLOR: { - for (auto comp : cmd->col) { - hash_primitive(comp); - } - } break; - case r_layerCmd_s::QUAD: { - hash_primitive(cmd->quad); - } break; - } - } + uint64_t subHash = MurmurHash64A(layer->cmdStorage.data(), (int)layer->cmdCursor, 0ull); + uint8_t const* p = (uint8_t const*)&subHash; + commandDigest.insert(commandDigest.end(), p, p + sizeof(subHash)); } - static std::vector const commandKey(crypto_shorthash_keybytes()); - crypto_shorthash((unsigned char*)commandDigest.data(), (unsigned char const*)commandBytes.data(), commandBytes.size(), (unsigned char const*)commandKey.data()); - } + + return commandDigest; + }); + } + else { + std::promise>> p; + elidedFrameHashFut = p.get_future(); + p.set_value({}); } - ++totalFrames; - if (!elideFrames || lastFrameHash != commandDigest) { - lastFrameHash = commandDigest; - ++drawnFrames; + elidedFrameHashFut.wait(); - glBindFramebuffer(GL_FRAMEBUFFER, rttMain.framebuffer); + ++totalFrames; + bool decideDraw = false; + bool elideDraw = false; + { + int drawRtt = 1 - presentRtt; + glBindFramebuffer(GL_FRAMEBUFFER, rttMain[drawRtt].framebuffer); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - for (int l = 0; l < numLayer; l++) { + int l{}; + for (l = 0; l < numLayer; l++) { + if (!decideDraw && elidedFrameHashFut.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { + decideDraw = true; + auto commandDigest = elidedFrameHashFut.get(); + if (commandDigest) { + if (*commandDigest == lastFrameHash) { + elideDraw = true; + break; + } + else { + lastFrameHash = *commandDigest; + } + } + else { + lastFrameHash.clear(); + } + } auto& layer = layerSort[l]; if (layerBreak && layerBreak->first == layer->layer && layerBreak->second == layer->subLayer) { #ifdef _WIN32 @@ -1302,43 +1399,85 @@ void r_renderer_c::EndFrame() } layer->Render(); } + if (!elideDraw) { + presentRtt = drawRtt; + ++drawnFrames; + } } - else { - ++savedFrames; - for (int l = 0; l < numLayer; l++) { - layerSort[l]->Discard(); + + if (!decideDraw) { + if (auto commandDigest = elidedFrameHashFut.get()) { + lastFrameHash = *commandDigest; + } + else { + lastFrameHash.clear(); } } + + for (int l = 0; l < numLayer; ++l) { + layerSort[l]->Discard(); + } delete[] layerSort; - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + { + auto rtt = rttMain[presentRtt]; + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - float blitTriPos[] = { - -1.0f, -1.0f, // - 3.0f, -1.0f, // - -1.0f, 3.0f, // - }; - float blitTriUV[] = { - 0.0f, 0.0f, // - 2.0f, 0.0f, // - 0.0f, 2.0f, // - }; + float blitTriPos[] = { + -1.0f, -1.0f, // + 3.0f, -1.0f, // + -1.0f, 3.0f, // + }; + float blitTriUV[] = { + 0.0f, 0.0f, // + 2.0f, 0.0f, // + 0.0f, 2.0f, // + }; + + glViewport(0, 0, sys->video->vid.fbSize[0], sys->video->vid.fbSize[1]); + glUseProgram(rtt.blitProg); + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, std::data(blitTriPos)); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 0, std::data(blitTriUV)); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glBindTexture(GL_TEXTURE_2D, rtt.colorTexture); + glUniform1i(rtt.blitSampleLocColour, 0); + glDrawArrays(GL_TRIANGLES, 0, 3); + glBindTexture(GL_TEXTURE_2D, 0); + glUseProgram(0); + } - glViewport(0, 0, sys->video->vid.fbSize[0], sys->video->vid.fbSize[1]); - glUseProgram(rttMain.blitProg); - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, std::data(blitTriPos)); - glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 0, std::data(blitTriUV)); - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); - glBindTexture(GL_TEXTURE_2D, rttMain.colorTexture); - glUniform1i(rttMain.blitSampleLocColour, 0); - glDrawArrays(GL_TRIANGLES, 0, 3); - glBindTexture(GL_TEXTURE_2D, 0); - glUseProgram(0); + if (showHash) { + if (ImGui::Begin("Hash")) { + std::vector b64(sodium_base64_ENCODED_LEN(lastFrameHash.size(), sodium_base64_VARIANT_URLSAFE)); + sodium_bin2base64(b64.data(), b64.size(), lastFrameHash.data(), lastFrameHash.size(), sodium_base64_VARIANT_URLSAFE); + ImGui::Text("%s", b64.data()); + } + ImGui::End(); + } - glFlush(); + std::chrono::time_point endFrameToc = std::chrono::steady_clock::now(); + frameStats.AppendDuration(&FrameStats::endFrameStepDurations, endFrameToc - endFrameTic); + + if (showTiming) { + if (ImGui::Begin("Timing")) { + auto stepStatsUi = [&](std::string label, auto& seq) { + auto [I, J] = std::minmax_element(seq.begin(), seq.end()); + ImGui::LabelText(fmt::format("{} min", label).c_str(), "%2.2f ms", *I * 1'000.0f); + ImGui::LabelText(fmt::format("{} cur", label).c_str(), "%2.2f ms", seq.back() * 1'000.0f); + ImGui::LabelText(fmt::format("{} max", label).c_str(), "%2.2f ms", *J * 1'000.0f); + ImGui::PlotLines(label.c_str(), + [](void* data, int idx) -> float { auto& dq = *(std::deque*)data; return dq[idx]; }, + &seq, (int)seq.size(), 0, nullptr, 0.0f, 30.0f / 1000.0f); + }; + stepStatsUi("MidFrame", frameStats.midFrameStepDurations); + ImGui::Separator(); + stepStatsUi("EndFrame", frameStats.endFrameStepDurations); + } + ImGui::End(); + } ImGui::Render(); ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); @@ -1724,3 +1863,73 @@ void r_renderer_c::DoScreenshot(image_c* i, const char* ext) sys->con->Print(fmt::format("Wrote screenshot to {}\n", ssname).c_str()); } } + +// ============================================ +// MurmurHash implementation from public domain +// ============================================ + +#if _WIN32 +#define BIG_CONSTANT(x) (x) +#else +#define BIG_CONSTANT(x) (x##LLU) +#endif + +static inline uint64_t MurmurHashGetBlock(const uint64_t* p) +{ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return *p; +#else + const uint8_t* c = (const uint8_t*)p; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; +#endif +} + +uint64_t MurmurHash64A(const void* key, int len, uint64_t seed) +{ + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t* data = (const uint64_t*)key; + const uint64_t* end = data + (len / 8); + + while (data != end) + { + uint64_t k = MurmurHashGetBlock(data++); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char* data2 = (const unsigned char*)data; + + switch (len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} diff --git a/engine/render/r_main.h b/engine/render/r_main.h index e2651b7..36ebd3c 100644 --- a/engine/render/r_main.h +++ b/engine/render/r_main.h @@ -11,6 +11,8 @@ #define R_MAXSHADERS 65536 #include +#include +#include #include #include @@ -29,9 +31,9 @@ struct r_viewport_s { // Render layer class r_layer_c { public: - int numCmd; - int cmdSize; - struct r_layerCmd_s** cmdList; + std::vector cmdStorage; + size_t cmdCursor{}; + size_t numCmd{}; int layer; int subLayer; @@ -43,14 +45,22 @@ class r_layer_c { void SetBlendMode(int mode); void Bind(r_tex_c* tex); void Color(col4_t col); - void Quad(double s0, double t0, double x0, double y0, double s1, double t1, double x1, double y1, double s2, double t2, double x2, double y2, double s3, double t3, double x3, double y3); + void Quad(float s0, float t0, float x0, float y0, float s1, float t1, float x1, float y1, float s2, float t2, float x2, float y2, float s3, float t3, float x3, float y3); void Render(); void Discard(); + struct CmdHandle { + uint32_t offset; + struct r_layerCmd_s* cmd; + }; + + CmdHandle GetFirstCommand(); + bool GetNextCommand(CmdHandle& handle); + private: r_renderer_c* renderer; - struct r_layerCmd_s* NewCommand(); + struct r_layerCmd_s* NewCommand(size_t size); }; // Renderer Main Class @@ -155,17 +165,36 @@ class r_renderer_c: public r_IRenderer, public conCmdHandler_c { GLuint blitProg = 0; GLuint blitAttribLocPos = 0; - GLuint blitAttibLocTC = 0; + GLuint blitAttribLocTC = 0; GLuint blitSampleLocColour = 0; }; - RenderTarget rttMain; + RenderTarget rttMain[2]; + int presentRtt = 0; std::vector lastFrameHash{}; uint64_t totalFrames{}; uint64_t drawnFrames{}; - uint64_t savedFrames{}; + + struct FrameStats { + std::deque midFrameStepDurations; + std::deque endFrameStepDurations; + std::deque wholeFrameDurations; + size_t historyLength = 128; + + void AppendDuration(std::deque FrameStats::*series, std::chrono::duration duration) { + auto& coll = this->*series; + if (coll.size() >= historyLength) { + size_t excess = coll.size() + 1 - historyLength; + coll.erase(coll.begin(), coll.begin() + excess); + } + coll.push_back(duration.count()); + } + }; + + std::chrono::time_point beginFrameToc; + FrameStats frameStats; bool elideFrames = false; bool debugImGui = false; From 59492a4003ea77c03f9ae62c5e66a13afd02f313 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Sat, 14 Oct 2023 04:47:36 +0200 Subject: [PATCH 04/10] Add renderer feature flag for Lua DPI awareness The runtime is aware of the operating system display scaling factor and presents a virtual coordinate space for the Lua application where it scales the sizes and positions of the window and cursor without allowing the application any choice. This is a hack as scaling up the window contents by the desired amount leads to a low quality image. In order to draw at true resolution the Lua program needs to both be able to tell what the scaling factor is to adjust all UI dimensions, but also able indicate to the runtime that it wishes to be free of the virtualization and instead use the real coordinate system. As it's a very big task both in time and design-wise to make the UI scale the runtime needs to be able either launch in legacy mode but also in a mode with true visuals. This is achieved here by making `RenderInit` take a sequence of feature flags as it's called from `Launch.lua`. This is early enough that most of the Lua application has yet to run and thus gets the chance to fetch the proper values as it starts up while the runtime also gets this request early enough to influence the initialization of the renderer and allocation of resources. This change introduces a flag string of `"DPI_AWARE"` that instructs the runtime to go into a mode where it reports the true scale factor in `GetScreenScale`, provides a full-resolution render target and doesn't rescale the screen size or mouse positions anymore. For alternative runtimes like `pobfrontend` any flags are ignored and their runtime continue to work as they always have, or can start honoring the flag if it fits their environment. # Conflicts: # engine/render.h # ui_api.cpp --- engine/render.h | 7 ++++++- engine/render/r_main.cpp | 19 +++++++++++++++---- engine/render/r_main.h | 3 ++- ui_api.cpp | 26 ++++++++++++++++++++++++-- ui_main.cpp | 4 ++-- ui_main.h | 2 +- 6 files changed, 50 insertions(+), 11 deletions(-) diff --git a/engine/render.h b/engine/render.h index 0703bd5..611b902 100644 --- a/engine/render.h +++ b/engine/render.h @@ -8,6 +8,11 @@ // Classes // ======= +// Renderer feature flags +enum r_featureFlag_e { + F_DPI_AWARE = 0x1, // App understands DPI, do not virtualize screen size/positions +}; + // Font alignment enum r_fontAlign_e { F_LEFT, @@ -59,7 +64,7 @@ class r_IRenderer { static r_IRenderer* GetHandle(sys_IMain* sysHnd); static void FreeHandle(r_IRenderer* hnd); - virtual void Init() = 0; + virtual void Init(r_featureFlag_e features) = 0; virtual void Shutdown() = 0; virtual void BeginFrame() = 0; diff --git a/engine/render/r_main.cpp b/engine/render/r_main.cpp index 1d18078..cdc01fa 100644 --- a/engine/render/r_main.cpp +++ b/engine/render/r_main.cpp @@ -870,10 +870,12 @@ void main(void) { // Init/Shutdown // ============= -void r_renderer_c::Init() +void r_renderer_c::Init(r_featureFlag_e features) { sys->con->PrintFunc("Render Init"); + apiDpiAware = !!(features & F_DPI_AWARE); + timer_c timer; timer.Start(); @@ -1776,15 +1778,24 @@ int r_renderer_c::VirtualScreenHeight() { } float r_renderer_c::VirtualScreenScaleFactor() { - return sys->video->vid.dpiScale; + if (apiDpiAware) { + return sys->video->vid.dpiScale; + } + return 1.0f; } int r_renderer_c::VirtualMap(int properValue) { - return (int)(properValue / VirtualScreenScaleFactor()); + if (apiDpiAware) { + return properValue; + } + return (int)(properValue / sys->video->vid.dpiScale); } int r_renderer_c::VirtualUnmap(int mappedValue) { - return (int)(mappedValue * VirtualScreenScaleFactor()); + if (apiDpiAware) { + return mappedValue; + } + return (int)(mappedValue * sys->video->vid.dpiScale); } // ===== diff --git a/engine/render/r_main.h b/engine/render/r_main.h index 36ebd3c..1f8c9c7 100644 --- a/engine/render/r_main.h +++ b/engine/render/r_main.h @@ -67,7 +67,7 @@ class r_layer_c { class r_renderer_c: public r_IRenderer, public conCmdHandler_c { public: // Interface - void Init(); + void Init(r_featureFlag_e features); void Shutdown(); void BeginFrame(); @@ -169,6 +169,7 @@ class r_renderer_c: public r_IRenderer, public conCmdHandler_c { GLuint blitSampleLocColour = 0; }; + bool apiDpiAware{}; RenderTarget rttMain[2]; int presentRtt = 0; diff --git a/ui_api.cpp b/ui_api.cpp index aefcad9..8fd142c 100644 --- a/ui_api.cpp +++ b/ui_api.cpp @@ -31,8 +31,9 @@ ** imgHandle:SetLoadingPriority(pri) ** width, height = imgHandle:ImageSize() ** -** RenderInit() +** RenderInit(["flag1"[, "flag2"...]]) flag:{"DPI_AWARE"} ** width, height = GetScreenSize() +** scaleFactor = GetScreenScale() ** SetClearColor(red, green, blue[, alpha]) ** SetDrawLayer({layer|nil}[, subLayer) ** GetDrawLayer() @@ -278,7 +279,20 @@ static int l_imgHandleImageSize(lua_State* L) static int l_RenderInit(lua_State* L) { ui_main_c* ui = GetUIPtr(L); - ui->RenderInit(); + int n = lua_gettop(L); + bool dpiAware = false; + for (int i = 1; i <= n; ++i) { + ui->LAssert(L, lua_isstring(L, i), "RenderInit() argument %d: expected string, got %s", i, luaL_typename(L, i)); + char const* str = lua_tostring(L, i); + if (strcmp(str, "DPI_AWARE") == 0) { + dpiAware = true; + } + } + r_featureFlag_e features{}; + if (dpiAware) { + features = (r_featureFlag_e)(features | F_DPI_AWARE); + } + ui->RenderInit(features); return 0; } @@ -290,6 +304,13 @@ static int l_GetScreenSize(lua_State* L) return 2; } +static int l_GetScreenScale(lua_State* L) +{ + ui_main_c* ui = GetUIPtr(L); + lua_pushnumber(L, ui->renderer->VirtualScreenScaleFactor()); + return 1; +} + static int l_SetClearColor(lua_State* L) { ui_main_c* ui = GetUIPtr(L); @@ -1345,6 +1366,7 @@ int ui_main_c::InitAPI(lua_State* L) // Rendering ADDFUNC(RenderInit); ADDFUNC(GetScreenSize); + ADDFUNC(GetScreenScale); ADDFUNC(SetClearColor); ADDFUNC(SetDrawLayer); ADDFUNC(GetDrawLayer); diff --git a/ui_main.cpp b/ui_main.cpp index c02b71b..2877e15 100644 --- a/ui_main.cpp +++ b/ui_main.cpp @@ -237,7 +237,7 @@ void ui_main_c::Init(int argc, char** argv) } } -void ui_main_c::RenderInit() +void ui_main_c::RenderInit(r_featureFlag_e features) { if (renderer) { return; @@ -252,7 +252,7 @@ void ui_main_c::RenderInit() // Initialise renderer renderer = r_IRenderer::GetHandle(sys); - renderer->Init(); + renderer->Init(features); // Create UI console handler conUI = ui_IConsole::GetHandle(this); diff --git a/ui_main.h b/ui_main.h index cedef43..a783edf 100644 --- a/ui_main.h +++ b/ui_main.h @@ -49,7 +49,7 @@ class ui_main_c: public ui_IMain { static int InitAPI(lua_State* L); - void RenderInit(); + void RenderInit(r_featureFlag_e features); void ScriptInit(); void ScriptShutdown(); From 9a1ae7b3b42459038fb8ebecab769e84537f4e40 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Sat, 14 Oct 2023 04:56:23 +0200 Subject: [PATCH 05/10] Fix some zlib problems By investigating some 64-bit portability warnings from MSVC some bugs were found in the API wrappers for zlib's Deflate and Inflate functions. The decompressed data was leaked as the function returned and could lead to memory exhaustion in the runtime upon repeated calls, this has been addressed by using RAII to release the temporary buffer. This also fixed a bug where both C++ `new[]` and C `realloc` was used for the same storage. Some constraints were tightened to avoid problems with correctness due to unfortunate integer type sizes beyond our control in zlib. --- ui_api.cpp | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/ui_api.cpp b/ui_api.cpp index 8fd142c..1755df3 100644 --- a/ui_api.cpp +++ b/ui_api.cpp @@ -854,16 +854,26 @@ static int l_Deflate(lua_State* L) deflateInit(&z, 9); size_t inLen; byte* in = (byte*)lua_tolstring(L, 1, &inLen); - int outSz = deflateBound(&z, inLen); - byte* out = new byte[outSz]; + // Prevent deflation of input data larger than 128 MiB. + size_t const maxInLen = 128ull << 20; + if (inLen > maxInLen) { + lua_pushnil(L); + lua_pushstring(L, "Input larger than 128 MiB"); + return 2; + } + uLong outSz = deflateBound(&z, (uLong)inLen); + // Clamp deflate bound to a fairly reasonable 128 MiB. + size_t const maxOutLen = 128ull << 20; + outSz = std::min(outSz, maxOutLen); + std::vector out(outSz); z.next_in = in; - z.avail_in = inLen; - z.next_out = out; + z.avail_in = (uInt)inLen; + z.next_out = out.data(); z.avail_out = outSz; int err = deflate(&z, Z_FINISH); deflateEnd(&z); if (err == Z_STREAM_END) { - lua_pushlstring(L, (const char*)out, z.total_out); + lua_pushlstring(L, (const char*)out.data(), z.total_out); return 1; } else { @@ -881,30 +891,40 @@ static int l_Inflate(lua_State* L) ui->LAssert(L, lua_isstring(L, 1), "Inflate() argument 1: expected string, got %s", luaL_typename(L, 1)); size_t inLen; byte* in = (byte*)lua_tolstring(L, 1, &inLen); - int outSz = inLen * 4; - byte* out = new byte[outSz]; + size_t const maxInLen = 128ull << 20; + if (inLen > maxInLen) { + lua_pushnil(L); + lua_pushstring(L, "Input larger than 128 MiB"); + } + uInt outSz = (uInt)(inLen * 4); + std::vector out(outSz); z_stream_s z; z.next_in = in; - z.avail_in = inLen; + z.avail_in = (uInt)inLen; z.zalloc = NULL; z.zfree = NULL; - z.next_out = out; + z.next_out = out.data(); z.avail_out = outSz; inflateInit(&z); int err; while ((err = inflate(&z, Z_NO_FLUSH)) == Z_OK) { + // Output buffer filled, try to embiggen it. if (z.avail_out == 0) { - // Output buffer filled, embiggen it - int newSz = outSz << 1; - trealloc(out, newSz); - z.next_out = out + outSz; + // Avoid growing inflate output size after 128 MiB. + size_t const maxOutLen = 128ull << 20; + if (outSz > maxOutLen) { + break; + } + int newSz = outSz * 2; + out.resize(newSz); + z.next_out = out.data() + outSz; z.avail_out = outSz; outSz = newSz; } } inflateEnd(&z); if (err == Z_STREAM_END) { - lua_pushlstring(L, (const char*)out, z.total_out); + lua_pushlstring(L, (const char*)out.data(), z.total_out); return 1; } else { From 9664cd4bf219c26de50d3e4b614b397eeaf3f298 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Sat, 14 Oct 2023 14:04:53 +0200 Subject: [PATCH 06/10] Restore screenshot functionality and Targa saving With the migration to OpenGL ES2 from OpenGL 1.1 some parameters of `glReadPixels` became more restrictive, only allowing two distinct pairs of format/type; neither of which matched the format we needed to capture screenshots. This change reads out pixels in an always-supported format (RGBA) and converts it to the desired RGB as it flips the image. Stubbed out functionality to write Targa files was also reimplemented via stb_image. --- engine/core/core_image.cpp | 37 ++++++++++++++++++++--------- engine/render/r_main.cpp | 48 +++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/engine/core/core_image.cpp b/engine/core/core_image.cpp index ea82213..244193d 100644 --- a/engine/core/core_image.cpp +++ b/engine/core/core_image.cpp @@ -50,7 +50,7 @@ image_c::~image_c() void image_c::CopyRaw(int inType, dword inWidth, dword inHeight, const byte* inDat) { - if (dat) delete dat; + if (dat) delete[] dat; comp = inType & 0xF; type = inType; width = inWidth; @@ -61,7 +61,7 @@ void image_c::CopyRaw(int inType, dword inWidth, dword inHeight, const byte* inD void image_c::Free() { - delete dat; + delete[] dat; dat = NULL; } @@ -188,7 +188,7 @@ bool targa_c::Load(const char* fileName) int rlen = ((rlehdr & 0x7F) + 1) * comp; if (x + rlen > rowSize) { con->Warning("TGA '%s': invalid RLE coding (overlong row)", fileName); - delete dat; + delete[] dat; return true; } if (rlehdr & 0x80) { @@ -225,7 +225,22 @@ bool targa_c::Load(const char* fileName) bool targa_c::Save(const char* fileName) { - return true; + if (type != IMGTYPE_RGB && type != IMGTYPE_RGBA) { + return true; + } + + // Open file + fileOutputStream_c out; + if (out.FileOpen(fileName, true)) { + return true; + } + + auto rc = stbi_write_tga_to_func([](void* ctx, void* data, int size) { + auto out = (fileOutputStream_c*)ctx; + out->Write(data, size); + }, &out, width, height, comp, dat); + + return !rc; } bool targa_c::ImageInfo(const char* fileName, imageInfo_s* info) @@ -277,14 +292,14 @@ bool jpeg_c::Load(const char* fileName) return true; } int x, y, in_comp; - if (!stbi_info_from_memory(fileData.data(), fileData.size(), &x, &y, &in_comp)) { + if (!stbi_info_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &in_comp)) { return true; } if (in_comp != 1 && in_comp != 3) { con->Warning("JPEG '%s': unsupported component count '%d'", fileName, comp); return true; } - stbi_uc* data = stbi_load_from_memory(fileData.data(), fileData.size(), &x, &y, &in_comp, in_comp); + stbi_uc* data = stbi_load_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &in_comp, in_comp); if (!data) { stbi_image_free(data); return true; @@ -335,7 +350,7 @@ bool jpeg_c::ImageInfo(const char* fileName, imageInfo_s* info) return true; } int x, y, comp; - if (stbi_info_from_memory(fileData.data(), fileData.size(), &x, &y, &comp)) { + if (stbi_info_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &comp)) { return true; } @@ -366,14 +381,14 @@ bool png_c::Load(const char* fileName) return true; } int x, y, in_comp; - if (!stbi_info_from_memory(fileData.data(), fileData.size(), &x, &y, &in_comp)) { + if (!stbi_info_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &in_comp)) { return true; } width = x; height = y; comp = (in_comp == 1 || in_comp == 3) ? 3 : 4; type = comp == 3 ? IMGTYPE_RGB : IMGTYPE_RGBA; - stbi_uc* data = stbi_load_from_memory(fileData.data(), fileData.size(), &x, &y, &in_comp, comp); + stbi_uc* data = stbi_load_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &in_comp, comp); if (!data) { stbi_image_free(data); return true; @@ -420,7 +435,7 @@ bool png_c::ImageInfo(const char* fileName, imageInfo_s* info) return true; } int x, y, comp; - if (stbi_info_from_memory(fileData.data(), fileData.size(), &x, &y, &comp)) { + if (stbi_info_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &comp)) { return true; } @@ -449,7 +464,7 @@ bool gif_c::Load(const char* fileName) return true; } int x, y, in_comp; - stbi_uc* data = stbi_load_from_memory(fileData.data(), fileData.size(), &x, &y, &in_comp, 4); + stbi_uc* data = stbi_load_from_memory(fileData.data(), (int)fileData.size(), &x, &y, &in_comp, 4); if (!data || in_comp != 4) { stbi_image_free(data); return true; diff --git a/engine/render/r_main.cpp b/engine/render/r_main.cpp index cdc01fa..9d6ef2c 100644 --- a/engine/render/r_main.cpp +++ b/engine/render/r_main.cpp @@ -1830,25 +1830,45 @@ void r_renderer_c::C_Screenshot(IConsole* conHnd, args_c& args) void r_renderer_c::DoScreenshot(image_c* i, const char* ext) { - int xs = sys->video->vid.size[0]; - int ys = sys->video->vid.size[1]; + if (i->type != IMGTYPE_RGB) { + return; + } + auto& rt = rttMain[presentRtt]; + int const xs = rt.width; + int const ys = rt.height; - int size = xs * ys * 3; - byte* sbuf = new byte[size]; + // Pixel reading only supports RGBA and an implementation-specific format. + // Use RGBA for convenience as that's close enough to what we want to save in the end. + int const readSize = xs * ys * 4; + int const writeSize = xs * ys * 3; + std::vector sbuf(readSize); // Read the front buffer + GLint oldFb{}; + GLenum oglErr = glGetError(); + GLenum implColorReadFormat{}, implColorReadType{}; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &oldFb); + glBindFramebuffer(GL_FRAMEBUFFER, rttMain[presentRtt].framebuffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); - glReadPixels(0, 0, xs, ys, r_tex_c::GLTypeForImgType(i->type), GL_UNSIGNED_BYTE, sbuf); - - // Flip the image - int span = xs * 3; - byte* ss = new byte[size]; - byte* p1 = sbuf; - byte* p2 = ss + size - span; - for (int y = 0; y < ys; y++, p1 += span, p2 -= span) { - memcpy(p2, p1, span); + glReadPixels(0, 0, xs, ys, GL_RGBA, GL_UNSIGNED_BYTE, sbuf.data()); + oglErr = glGetError(); + glBindFramebuffer(GL_FRAMEBUFFER, oldFb); + + // Flip and convert the image to RGB + int const readSpan = xs * 4; + int const writeSpan = xs * 3; + byte* ss = new byte[writeSize]; // This is a raw pointer as ownership is taken by the image object. + byte* p1 = sbuf.data(); + byte* p2 = ss + writeSize - writeSpan; + for (int y = 0; y < ys; ++y, p2 -= writeSpan * 2) { + for (int x = 0; x < xs; ++x) { + *p2++ = *p1++; // R + *p2++ = *p1++; // G + *p2++ = *p1++; // B + p1++; // A + } } - delete[] sbuf; + sbuf.clear(); // Set image info i->dat = ss; From 644b353bf34dbf8b15386657f02e5105e19c2107 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Mon, 18 Mar 2024 15:58:10 +0100 Subject: [PATCH 07/10] Add C++ RAII scaffolding to handle API unwinding In order to support C++ RAII classes in the functions exposed in the API to Lua this change also introduces a new assertion `LExpect` functioning like the existing `LAssert` but instead of directly transferring control to Lua it pushes the message on the Lua stack and throws a C++ exception to unwind the C++ call stack. For this scheme the convenience macros `SG_LUA_CPP_FUN_BEGIN(Name)` and `SG_LUA_CPP_FUN_END()` wrap the function entrypoint and perform the second phase of assertion processing when needed. --- ui_api.cpp | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ui_main.cpp | 11 ++++++++ ui_main.h | 5 +++- 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/ui_api.cpp b/ui_api.cpp index 1755df3..09a334d 100644 --- a/ui_api.cpp +++ b/ui_api.cpp @@ -95,6 +95,52 @@ static ui_main_c* GetUIPtr(lua_State* L) return ui; } +// =============== +// C++ scaffolding +// =============== + +/* +* ui->LAssert transfers control immediately out of the function without destroying +* any C++ objects. To support RAII this scaffolding serves as a landing pad for +* ui->LExpect, to transfer control to Lua but only after the call stack has been +* unwound with normal C++ exception semantics. +* +* Example use site: +* SG_LUA_FUN_BEGIN(DoTheThing) +* { +* ui_main_c* ui = GetUIPtr(L); +* auto foo = std::make_shared(); +* ui->LExpect(L, lua_gettop(L) >= 1), "Usage: DoTheThing(x)"); +* ui->LExpect(L, lua_isstring(L, 1), "DoTheThing() argument 1: expected string, got %s", luaL_typename(L, 1)); +* return 0; +* } +* SG_LUA_FUN_END() +*/ + +#ifdef _WIN32 +#define SG_NOINLINE __declspec(noinline) +#else +#define SG_NOINLINE [[gnu::noinline]] +#endif +#define SG_NORETURN [[noreturn]] + +SG_NORETURN static void LuaErrorWrapper(lua_State* L) +{ + lua_error(L); +} + +#define SG_LUA_CPP_FUN_BEGIN(Name) \ +static int l_##Name(lua_State* L) { \ + int (*fun)(lua_State*) = [](lua_State* L) SG_NOINLINE -> int { \ + try + +#define SG_LUA_CPP_FUN_END() \ + catch (ui_expectationFailed_s) { return -1; } \ + }; \ + int rc = fun(L); \ + if (rc < 0) { LuaErrorWrapper(L); } \ + return rc; } + // ========= // Callbacks // ========= @@ -272,6 +318,38 @@ static int l_imgHandleImageSize(lua_State* L) return 2; } +class ui_luaReader_c { +public: + ui_luaReader_c(ui_main_c* ui, lua_State* L, std::string funName) : ui(ui), L(L), funName(funName) {} + + // Always zero terminated as all regular strings are terminated in Lua. + std::string_view ArgToString(int k) { + ui->LExpect(L, lua_isstring(L, k), "%s() argument %d: expected string, got %s", + funName.c_str(), k, luaL_typename(L, k)); + return lua_tostring(L, k); + } + + void ArgCheckTable(int k) { + ui->LExpect(L, lua_istable(L, k), "%s() argument %d: expected table, got %s", + funName.c_str(), k, luaL_typename(L, k)); + } + + void ArgCheckNumber(int k) { + ui->LExpect(L, lua_isnumber(L, k), "%s() argument %d: expected number, got %s", + funName.c_str(), k, luaL_typename(L, k)); + } + + void ValCheckNumber(int k, char const* ctx) { + ui->LExpect(L, lua_isnumber(L, k), "%s() %s: expected number, got %s", + funName.c_str(), ctx, k, luaL_typename(L, k)); + } + +private: + ui_main_c* ui; + lua_State* L; + std::string funName; +}; + // ========= // Rendering // ========= diff --git a/ui_main.cpp b/ui_main.cpp index 2877e15..7de5ff1 100644 --- a/ui_main.cpp +++ b/ui_main.cpp @@ -95,6 +95,17 @@ void ui_main_c::LAssert(lua_State* L, int cond, const char* fmt, ...) } } +void ui_main_c::LExpect(lua_State* L, int cond, const char* fmt, ...) +{ + if (!cond) { + va_list va; + va_start(va, fmt); + lua_pushvfstring(L, fmt, va); + va_end(va); + throw ui_expectationFailed_s{}; + } +} + int ui_main_c::IsUserData(lua_State* L, int index, const char* metaName) { if (lua_type(L, index) != LUA_TUSERDATA || lua_getmetatable(L, index) == 0) { diff --git a/ui_main.h b/ui_main.h index a783edf..3239c60 100644 --- a/ui_main.h +++ b/ui_main.h @@ -8,6 +8,8 @@ // Classes // ======= +struct ui_expectationFailed_s {}; + // UI Manager class ui_main_c: public ui_IMain { public: @@ -53,7 +55,8 @@ class ui_main_c: public ui_IMain { void ScriptInit(); void ScriptShutdown(); - void LAssert(lua_State* L, int cond, const char* fmt, ...); + void LAssert(lua_State* L, int cond, const char* fmt, ...); // Non-local return to Lua code on failure + void LExpect(lua_State* L, int cond, const char* fmt, ...); // Throws ui_expectationFailed_s on failure, message on Lua stack int IsUserData(lua_State* L, int index, const char* metaName); int PushCallback(const char* name); void PCall(int narg, int nret); From a19830d07418f422ebe2517e9666f0f9654eb288 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Fri, 20 Oct 2023 16:06:43 +0200 Subject: [PATCH 08/10] Access current/last render target via functions To make it easier for functionality like screenshot capture and frame rendering to use the correct render target this change introduces two functions for use inside the renderer to borrow the currently presented render target and a render target suitable for drawing. --- engine/render/r_main.cpp | 21 +++++++++++++++------ engine/render/r_main.h | 3 +++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/engine/render/r_main.cpp b/engine/render/r_main.cpp index 9d6ef2c..2ba0bbc 100644 --- a/engine/render/r_main.cpp +++ b/engine/render/r_main.cpp @@ -1372,8 +1372,7 @@ void r_renderer_c::EndFrame() bool decideDraw = false; bool elideDraw = false; { - int drawRtt = 1 - presentRtt; - glBindFramebuffer(GL_FRAMEBUFFER, rttMain[drawRtt].framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, GetDrawRenderTarget().framebuffer); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); int l{}; for (l = 0; l < numLayer; l++) { @@ -1402,7 +1401,7 @@ void r_renderer_c::EndFrame() layer->Render(); } if (!elideDraw) { - presentRtt = drawRtt; + presentRtt = 1 - presentRtt; ++drawnFrames; } } @@ -1422,7 +1421,7 @@ void r_renderer_c::EndFrame() delete[] layerSort; { - auto rtt = rttMain[presentRtt]; + auto& rtt = GetPresentRenderTarget(); glBindFramebuffer(GL_FRAMEBUFFER, 0); glClearColor(0.0f, 0.0f, 0.0f, 1.0f); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); @@ -1833,7 +1832,7 @@ void r_renderer_c::DoScreenshot(image_c* i, const char* ext) if (i->type != IMGTYPE_RGB) { return; } - auto& rt = rttMain[presentRtt]; + auto& rt = GetPresentRenderTarget(); int const xs = rt.width; int const ys = rt.height; @@ -1848,7 +1847,7 @@ void r_renderer_c::DoScreenshot(image_c* i, const char* ext) GLenum oglErr = glGetError(); GLenum implColorReadFormat{}, implColorReadType{}; glGetIntegerv(GL_FRAMEBUFFER_BINDING, &oldFb); - glBindFramebuffer(GL_FRAMEBUFFER, rttMain[presentRtt].framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, rt.framebuffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); glReadPixels(0, 0, xs, ys, GL_RGBA, GL_UNSIGNED_BYTE, sbuf.data()); oglErr = glGetError(); @@ -1895,6 +1894,16 @@ void r_renderer_c::DoScreenshot(image_c* i, const char* ext) } } +r_renderer_c::RenderTarget& r_renderer_c::GetDrawRenderTarget() +{ + return rttMain[1 - presentRtt]; +} + +r_renderer_c::RenderTarget& r_renderer_c::GetPresentRenderTarget() +{ + return rttMain[presentRtt]; +} + // ============================================ // MurmurHash implementation from public domain // ============================================ diff --git a/engine/render/r_main.h b/engine/render/r_main.h index 1f8c9c7..42428bf 100644 --- a/engine/render/r_main.h +++ b/engine/render/r_main.h @@ -205,4 +205,7 @@ class r_renderer_c: public r_IRenderer, public conCmdHandler_c { void DoScreenshot(image_c* i, const char* ext); void C_Screenshot(IConsole* conHnd, args_c &args); + + RenderTarget& GetDrawRenderTarget(); + RenderTarget& GetPresentRenderTarget(); }; From 5954cdb7cb6ba5094d752612f139e0f96eeec6be Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Fri, 22 Mar 2024 00:07:48 +0100 Subject: [PATCH 09/10] docs: Clarify origin of MurmurHash implementation --- engine/render/r_main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/engine/render/r_main.cpp b/engine/render/r_main.cpp index 2ba0bbc..e9a2d26 100644 --- a/engine/render/r_main.cpp +++ b/engine/render/r_main.cpp @@ -1904,9 +1904,10 @@ r_renderer_c::RenderTarget& r_renderer_c::GetPresentRenderTarget() return rttMain[presentRtt]; } -// ============================================ -// MurmurHash implementation from public domain -// ============================================ +// =========================================================== +// MurmurHash implementation from public domain, obtained from +// https://github.com/explosion/murmurhash/blob/9281c4825c24e64476457db89fb1d39bf09b3d23/murmurhash/MurmurHash2.cpp +// =========================================================== #if _WIN32 #define BIG_CONSTANT(x) (x) From 84e98fbcdd4c0d812be38f10858d005c3f65b2f3 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Fri, 22 Mar 2024 00:38:57 +0100 Subject: [PATCH 10/10] docs: add usage example for ui_luaReader_c --- ui_api.cpp | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/ui_api.cpp b/ui_api.cpp index 09a334d..86c02ff 100644 --- a/ui_api.cpp +++ b/ui_api.cpp @@ -106,7 +106,7 @@ static ui_main_c* GetUIPtr(lua_State* L) * unwound with normal C++ exception semantics. * * Example use site: -* SG_LUA_FUN_BEGIN(DoTheThing) +* SG_LUA_CPP_FUN_BEGIN(DoTheThing) * { * ui_main_c* ui = GetUIPtr(L); * auto foo = std::make_shared(); @@ -114,7 +114,7 @@ static ui_main_c* GetUIPtr(lua_State* L) * ui->LExpect(L, lua_isstring(L, 1), "DoTheThing() argument 1: expected string, got %s", luaL_typename(L, 1)); * return 0; * } -* SG_LUA_FUN_END() +* SG_LUA_CPP_FUN_END() */ #ifdef _WIN32 @@ -318,6 +318,35 @@ static int l_imgHandleImageSize(lua_State* L) return 2; } +// =============== +// Data validation +// =============== + +/* +* ui_luaReader_c wraps the common validation of arguments or values from Lua in a class +* that ensures a consistent assertion message and reduces the risk of mistakes in +* parameter validation. +* +* As it has scoped RAII resources and uses ui->LExcept() it must only be used in functions +* exposed to Lua through the SG_LUA_CPP_FUN_BEGIN/END scheme as that ensures proper cleanup +* when unwinding. +* +* Example use site: +* SG_LUA_CPP_FUN_BEGIN(DoTheThing) +* { +* ui_main_c* ui = GetUIPtr(L); +* ui_luaReader_c reader(ui, L, "DoTheThing"); +* ui->LExpect(L, lua_gettop(L) >= 2), "Usage: DoTheThing(table, number)"); +* reader.ArgCheckTable(1); // short-hand to validate formal arguments to function +* reader.ArgCheckNumber(2); // -''- +* reader.ValCheckNumber(-1, "descriptive name"); // validates any value on the Lua stack, indicating what the value represents +* // Do the thing +* return 0; +* } +* SG_LUA_CPP_FUN_END() +*/ + + class ui_luaReader_c { public: ui_luaReader_c(ui_main_c* ui, lua_State* L, std::string funName) : ui(ui), L(L), funName(funName) {}