diff --git a/Core/GameEngineDevice/Source/W3DDevice/GameClient/W3DSmudge.cpp b/Core/GameEngineDevice/Source/W3DDevice/GameClient/W3DSmudge.cpp index d11d305123f..0cd8a89b69b 100644 --- a/Core/GameEngineDevice/Source/W3DDevice/GameClient/W3DSmudge.cpp +++ b/Core/GameEngineDevice/Source/W3DDevice/GameClient/W3DSmudge.cpp @@ -57,6 +57,7 @@ void W3DSmudgeManager::init(void) { SmudgeManager::init(); ReAcquireResources(); + testHardwareSupport(); } void W3DSmudgeManager::reset (void) @@ -72,9 +73,12 @@ void W3DSmudgeManager::ReleaseResources(void) REF_PTR_RELEASE(m_indexBuffer); } -//Make sure (SMUDGE_DRAW_SIZE * 12) < 65535 because that's the max index buffer size. + #define SMUDGE_DRAW_SIZE 500 //draw at most 50 smudges per call. Tweak value to improve CPU/GPU parallelism. +static_assert(SMUDGE_DRAW_SIZE * 5 < 0x10000, "Vertex index exceeds 16-bit limit"); + + void W3DSmudgeManager::ReAcquireResources(void) { ReleaseResources(); @@ -130,180 +134,73 @@ void W3DSmudgeManager::ReAcquireResources(void) } } -/*Copies a portion of the current render target into a specified buffer*/ -Int copyRect(unsigned char *buf, Int bufSize, int oX, int oY, int width, int height) -{ - IDirect3DSurface8 *surface=NULL; ///GetRenderTarget(&surface); - - if (!surface) - goto error; - - D3DSURFACE_DESC desc; - - surface->GetDesc(&desc); - - RECT srcRect; - srcRect.left=oX; - srcRect.top=oY; - srcRect.right=oX+width; - srcRect.bottom=oY+height; - - POINT dstPoint; - dstPoint.x=0; - dstPoint.y=0; - - hr=m_pDev->CreateImageSurface( width, height, desc.Format, &tempSurface); - - if (hr != S_OK) - goto error; - - hr=m_pDev->CopyRects(surface,&srcRect,1,tempSurface,&dstPoint); - - if (hr != S_OK) - goto error; - - D3DLOCKED_RECT lrect; - - hr=tempSurface->LockRect(&lrect,NULL,D3DLOCK_READONLY); - - if (hr != S_OK) - goto error; - - tempSurface->GetDesc(&desc); - - if (desc.Size < bufSize) - bufSize = desc.Size; - - memcpy(buf,lrect.pBits,bufSize); - result = bufSize; - - tempSurface->UnlockRect(); - -error: - if (surface) - surface->Release(); - if (tempSurface) - tempSurface->Release(); - - return result; -} - -#define UNIQUE_COLOR (0x12345678) -#define BLOCK_SIZE (8) - Bool W3DSmudgeManager::testHardwareSupport(void) { - if (m_hardwareSupportStatus == SMUDGE_SUPPORT_UNKNOWN) - { //we have not done the test yet. - - IDirect3DTexture8 *backTexture=W3DShaderManager::getRenderTexture(); - if (!backTexture) - { //do trivial test first to see if render target exists. - m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; - return FALSE; - } + // Return cached result if already checked + if (m_hardwareSupportStatus != SMUDGE_SUPPORT_UNKNOWN) + { + return m_hardwareSupportStatus == SMUDGE_SUPPORT_YES; + } - if (!W3DShaderManager::isRenderingToTexture()) - return FALSE; //can't do the test unless we're rendering to texture. - - VertexMaterialClass *vmat=VertexMaterialClass::Get_Preset(VertexMaterialClass::PRELIT_DIFFUSE); - DX8Wrapper::Set_Material(vmat); - REF_PTR_RELEASE(vmat); //no need to keep a reference since it's a preset. - - ShaderClass shader=ShaderClass::_PresetOpaqueShader; - shader.Set_Depth_Compare(ShaderClass::PASS_ALWAYS); - shader.Set_Depth_Mask(ShaderClass::DEPTH_WRITE_DISABLE); - DX8Wrapper::Set_Shader(shader); - DX8Wrapper::Set_Texture(0,NULL); - DX8Wrapper::Apply_Render_State_Changes(); //force update of view and projection matrices - - struct _TRANS_LIT_TEX_VERTEX { - Vector4 p; - DWORD color; // diffuse color - float u; - float v; - } v[4]; - - //bottom right - v[0].p = Vector4( BLOCK_SIZE-0.5f, BLOCK_SIZE-0.5f, 0.0f, 1.0f ); - v[0].u = BLOCK_SIZE/(Real)TheDisplay->getWidth(); v[0].v = BLOCK_SIZE/(Real)TheDisplay->getHeight(); - //top right - v[1].p = Vector4( BLOCK_SIZE-0.5f, 0-0.5f, 0.0f, 1.0f ); - v[1].u = BLOCK_SIZE/(Real)TheDisplay->getWidth(); v[1].v = 0; - //bottom left - v[2].p = Vector4( 0-0.5f, BLOCK_SIZE-0.5f, 0.0f, 1.0f ); - v[2].u = 0; v[2].v = BLOCK_SIZE/(Real)TheDisplay->getHeight(); - //top left - v[3].p = Vector4( 0-0.5f, 0-0.5f, 0.0f, 1.0f ); - v[3].u = 0; v[3].v = 0; - - v[0].color = UNIQUE_COLOR; - v[1].color = UNIQUE_COLOR; - v[2].color = UNIQUE_COLOR; - v[3].color = UNIQUE_COLOR; - - LPDIRECT3DDEVICE8 pDev=DX8Wrapper::_Get_D3D_Device8(); - - //draw polygons like this is very inefficient but for only 2 triangles, it's - //not worth bothering with index/vertex buffers. - pDev->SetVertexShader(D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_TEX1); - - pDev->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, v, sizeof(_TRANS_LIT_TEX_VERTEX)); - - DWORD refData[BLOCK_SIZE*BLOCK_SIZE]; - memset(refData,0,sizeof(refData)); - Int bufSize=copyRect((unsigned char *)refData,sizeof(refData),0,0,BLOCK_SIZE,BLOCK_SIZE); //copy area we just rendered using solid color - if (!bufSize) - { - m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; - return FALSE; - } + LPDIRECT3DDEVICE8 d3d8Device = DX8Wrapper::_Get_D3D_Device8(); + LPDIRECT3D8 d3d8Interface = DX8Wrapper::_Get_D3D8(); + if (!d3d8Device || !d3d8Interface) + { + m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; + return false; + } - DX8Wrapper::Set_DX8_Texture(0,backTexture); + D3DCAPS8 caps; + d3d8Device->GetDeviceCaps(&caps); - DWORD testData[BLOCK_SIZE*BLOCK_SIZE]; - memset(testData,0xff,sizeof(testData)); + // DX8 requires dynamic textures to efficiently update smudge texture + if (!(caps.Caps2 & D3DCAPS2_DYNAMICTEXTURES)) + { + m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; + return false; + } - v[0].color = 0xffffffff; - v[1].color = 0xffffffff; - v[2].color = 0xffffffff; - v[3].color = 0xffffffff; + IDirect3DTexture8 *backTexture = W3DShaderManager::getRenderTexture(); + if (!backTexture) + { + m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; + return FALSE; + } - pDev->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, v, sizeof(_TRANS_LIT_TEX_VERTEX)); - bufSize=copyRect((unsigned char *)testData,sizeof(testData),0,0,BLOCK_SIZE,BLOCK_SIZE); + IDirect3DSurface8* surface; + if (FAILED(backTexture->GetSurfaceLevel(0, &surface))) + { + m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; + return false; + } - if (!bufSize) - { - m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; - return FALSE; - } + D3DSURFACE_DESC desc; + surface->GetDesc(&desc); + surface->Release(); - //compare the 2 buffers to see if they match. - if (memcmp(testData,refData,bufSize) == 0) - { - m_hardwareSupportStatus = SMUDGE_SUPPORT_YES; - return TRUE; - } + // Check if the device supports render-to-texture for this format + HRESULT hr = d3d8Interface->CheckDeviceFormat( + D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + DX8Wrapper::_Get_D3D_Back_Buffer_Format(), + D3DUSAGE_RENDERTARGET, + D3DRTYPE_TEXTURE, + desc.Format + ); + + if (hr != D3D_OK) + { m_hardwareSupportStatus = SMUDGE_SUPPORT_NO; + return false; } - return (SMUDGE_SUPPORT_YES == m_hardwareSupportStatus); + m_hardwareSupportStatus = SMUDGE_SUPPORT_YES; + return true; } void W3DSmudgeManager::render(RenderInfoClass &rinfo) { //Verify that the card supports the effect. - if (!testHardwareSupport()) + if (m_hardwareSupportStatus == SMUDGE_SUPPORT_NO) return; CameraClass &camera=rinfo.Camera; @@ -391,18 +288,8 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) Vector2 &thisUV=verts[i].uv; //Clamp coordinates so we're not referencing texels outside the view. - if (thisUV.X > texClampX) - smudge->m_offset.X = 0; - else - if (thisUV.X < 0) - smudge->m_offset.X = 0; - - if (thisUV.Y > texClampY) - smudge->m_offset.Y = 0; - else - if (thisUV.Y < 0) - smudge->m_offset.Y = 0; - + WWMath::Clamp(thisUV.X, 0, texClampX); + WWMath::Clamp(thisUV.Y, 0, texClampY); } //Finish center vertex @@ -410,7 +297,7 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) uvSpanX=verts[3].uv.X - verts[0].uv.X; uvSpanY=verts[1].uv.Y - verts[0].uv.Y; verts[4].uv.X=verts[0].uv.X+uvSpanX*(0.5f+smudge->m_offset.X); - verts[4].uv.Y=verts[0].uv.Y+uvSpanY*(0.5f+smudge->m_offset.X); + verts[4].uv.Y=verts[0].uv.Y+uvSpanY*(0.5f+smudge->m_offset.Y); count++; //increment visible smudge count. smudge=smudge->Succ(); @@ -450,7 +337,29 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) #ifdef USE_COPY_RECTS DX8Wrapper::Set_Texture(0,m_backgroundTexture); #else - DX8Wrapper::Set_DX8_Texture(0,backTexture); + + IDirect3DTexture8* renderTextureCopy = NULL; + DX8Wrapper::_Get_D3D_Device8()->CreateTexture( + D3DDesc.Width, + D3DDesc.Height, + 1, + 0, + D3DDesc.Format, + D3DPOOL_DEFAULT, + &renderTextureCopy); + + IDirect3DSurface8* srcRT = nullptr; + IDirect3DSurface8* dstRT = nullptr; + + backTexture->GetSurfaceLevel(0, &srcRT); + renderTextureCopy->GetSurfaceLevel(0, &dstRT); + + DX8Wrapper::_Get_D3D_Device8()->CopyRects(srcRT, nullptr, 0, dstRT, nullptr); + + srcRT->Release(); + dstRT->Release(); + + DX8Wrapper::Set_DX8_Texture(0,renderTextureCopy); //Need these states in case texture is non-power-of-2 DX8Wrapper::Set_DX8_Texture_Stage_State( 0, D3DTSS_ADDRESSU, D3DTADDRESS_CLAMP); DX8Wrapper::Set_DX8_Texture_Stage_State( 0, D3DTSS_ADDRESSV, D3DTADDRESS_CLAMP); @@ -462,12 +371,12 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) VertexMaterialClass *vmat=VertexMaterialClass::Get_Preset(VertexMaterialClass::PRELIT_DIFFUSE); DX8Wrapper::Set_Material(vmat); REF_PTR_RELEASE(vmat); - DX8Wrapper::Apply_Render_State_Changes(); //Disable reading texture alpha since it's undefined. //DX8Wrapper::Set_DX8_Texture_Stage_State(0,D3DTSS_COLOROP,D3DTOP_SELECTARG1); DX8Wrapper::Set_DX8_Texture_Stage_State(0,D3DTSS_ALPHAOP,D3DTOP_SELECTARG2); + Int smudgesBatchCount=0; Int smudgesRemaining=count; set=m_usedSmudgeSetList.Head(); //first smudge set that needs rendering. Smudge *remainingSmudgeStart=set->getUsedSmudgeList().Head(); //first smudge that needs rendering. @@ -497,7 +406,8 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) //Check if we exceeded maximum number of smudges allowed per draw call. if (smudgesInRenderBatch >= count) - { remainingSmudgeStart = smudge; + { + remainingSmudgeStart = smudge; goto flushSmudges; } @@ -530,9 +440,10 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) if (set) //start next batch at beginning of set. remainingSmudgeStart = set->getUsedSmudgeList().Head(); } -flushSmudges: - DX8Wrapper::Set_Vertex_Buffer(vb_access); } +flushSmudges: + ++smudgesBatchCount; + DX8Wrapper::Set_Vertex_Buffer(vb_access); DX8Wrapper::Draw_Triangles( 0,smudgesInRenderBatch*4, 0, smudgesInRenderBatch*5); @@ -548,7 +459,17 @@ void W3DSmudgeManager::render(RenderInfoClass &rinfo) smudgesRemaining -= smudgesInRenderBatch; } + // TheSuperHackers @bugfix xezon 15/06/2025 Draw a dummy point with the last vertex buffer + // to force the GPU to flush all current pipeline state and commit the previous draw call. + // This is required for some AMD models and drivers that refuse to flush a single draw call + // for the smudges. This draw call is invisible and harmless. + //if (smudgesBatchCount == 1) + //{ + // DX8Wrapper::_Get_D3D_Device8()->DrawPrimitive(D3DPT_POINTLIST, 0, 1); + //} + DX8Wrapper::Set_DX8_Texture_Stage_State(0,D3DTSS_COLOROP,D3DTOP_MODULATE); DX8Wrapper::Set_DX8_Texture_Stage_State(0,D3DTSS_ALPHAOP,D3DTOP_MODULATE); + renderTextureCopy->Release(); } diff --git a/GeneralsMD/Code/Libraries/Source/WWVegas/WW3D2/dx8wrapper.h b/GeneralsMD/Code/Libraries/Source/WWVegas/WW3D2/dx8wrapper.h index db5b93030cb..f21f17ee17c 100644 --- a/GeneralsMD/Code/Libraries/Source/WWVegas/WW3D2/dx8wrapper.h +++ b/GeneralsMD/Code/Libraries/Source/WWVegas/WW3D2/dx8wrapper.h @@ -455,6 +455,8 @@ class DX8Wrapper static void _Enable_Triangle_Draw(bool enable) { _EnableTriangleDraw=enable; } static bool _Is_Triangle_Draw_Enabled() { return _EnableTriangleDraw; } + static unsigned int Bytes_Per_Pixel(D3DFORMAT format); + /* ** Additional swap chain interface ** @@ -525,6 +527,7 @@ class DX8Wrapper static IDirect3D8* _Get_D3D8() { return D3DInterface; } /// Returns the display format - added by TR for video playback - not part of W3D static WW3DFormat getBackBufferFormat( void ); + static D3DFORMAT _Get_D3D_Back_Buffer_Format() { return DisplayFormat; } static bool Reset_Device(bool reload_assets=true); static const DX8Caps* Get_Current_Caps() { WWASSERT(CurrentCaps); return CurrentCaps; } @@ -1161,6 +1164,85 @@ WWINLINE void DX8Wrapper::Set_Alpha (const float alpha, unsigned int &color) component [3] = 255.0f * alpha; } +WWINLINE unsigned int DX8Wrapper::Bytes_Per_Pixel(D3DFORMAT format) +{ + switch (format) + { + // 8-bit formats + case D3DFMT_A8: + case D3DFMT_P8: + case D3DFMT_L8: + return 1; + + // 16-bit formats + case D3DFMT_R5G6B5: + case D3DFMT_X1R5G5B5: + case D3DFMT_A1R5G5B5: + case D3DFMT_A4R4G4B4: + case D3DFMT_X4R4G4B4: + case D3DFMT_A8L8: + case D3DFMT_A4L4: + case D3DFMT_V8U8: + case D3DFMT_L6V5U5: + return 2; + + // 24-bit formats + case D3DFMT_R8G8B8: + return 3; + + // 32-bit formats + case D3DFMT_A8R8G8B8: + case D3DFMT_X8R8G8B8: + case D3DFMT_A8R3G3B2: + case D3DFMT_A2B10G10R10: + case D3DFMT_G16R16: + case D3DFMT_X8L8V8U8: + case D3DFMT_Q8W8V8U8: + case D3DFMT_W11V11U10: + case D3DFMT_A2W10V10U10: + case D3DFMT_V16U16: + return 4; + + // Palette-based alpha + case D3DFMT_A8P8: + return 0; + + // Packed YUV + case D3DFMT_UYVY: + case D3DFMT_YUY2: + return 0; + + // Block-compressed + case D3DFMT_DXT1: + case D3DFMT_DXT2: + case D3DFMT_DXT3: + case D3DFMT_DXT4: + case D3DFMT_DXT5: + return 0; + + // Depth / stencil + case D3DFMT_D16_LOCKABLE: + case D3DFMT_D16: + case D3DFMT_D15S1: + case D3DFMT_D24S8: + case D3DFMT_D24X8: + case D3DFMT_D24X4S4: + case D3DFMT_D32: + return 0; + + // Non-image data + case D3DFMT_VERTEXDATA: + case D3DFMT_INDEX16: + case D3DFMT_INDEX32: + return 0; + + // Unknown / invalid + case D3DFMT_UNKNOWN: + default: + return 0; + } +} + WWINLINE void DX8Wrapper::Get_Render_State(RenderStateStruct& state) { state=render_state;