From 63a8d1786578f2bac5992b3273fb792ad7ea2faa Mon Sep 17 00:00:00 2001 From: Aaron Barany Date: Wed, 30 Dec 2020 19:44:42 -0800 Subject: [PATCH] More fixes for ASTC. Updated to the latest master of astc-encoder. Use the higher level function to populate the ASTC encoding block since a dummy image needs to be created anyway. No longer initialize tables, which are initialized by the context. Thread data is now created ahead of time before the threads start. This avoids issues where the table initialization could be done simultaneously across threads and cause data corruption. Build on Visual Studio 2019 for Windows. There seems to be an issue with HDR encoding for 32-bit Windows in release builds on VS 2017, which causes the HDR test to fail. The ASTC encoding library uses some extreme floating point values, so this combined with some optimizations could be the cause of the problem. --- README.md | 2 ++ azure-pipelines.yml | 2 +- lib/astc-encoder | 2 +- lib/src/AstcConverter.cpp | 72 ++++++++++++++++++--------------------- lib/src/Converter.cpp | 21 ++++++++---- 5 files changed, 52 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 12e2d30..cdddbcb 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,8 @@ Cuttlefish has been built for and tested on the following platforms: * Windows (requires Visual Studio 2015 or later) * Mac OS X +> **Note:** Issues have been observed with ASTC encoding with UFLOAT (i.e. HDR ASTC textures) on 32-bit release builds on Visual Studio 2017. Since the ASTC library uses some extreme float values, I suspect this is causing issues with some of the optimizations on that compiler and architecture. I recommend using Visual Studio 2019 or later on Windows. + # Building and Installing [CMake](https://cmake.org/) is used as the build system. The way to invoke CMake differs for different platforms. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9e0e739..969b1cf 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -185,7 +185,7 @@ jobs: displayName: Publish test results release - job: Windows pool: - vmImage: vs2017-win2016 + vmImage: windows-2019 workspace: clean: all strategy: diff --git a/lib/astc-encoder b/lib/astc-encoder index 96989cb..74c7f10 160000 --- a/lib/astc-encoder +++ b/lib/astc-encoder @@ -1 +1 @@ -Subproject commit 96989cb19b4f2ef0707992f48d0f395ec4e53a38 +Subproject commit 74c7f103a18c0744fd7bb613328cc0b3f73e941b diff --git a/lib/src/AstcConverter.cpp b/lib/src/AstcConverter.cpp index 7e2d0ae..f18bb21 100644 --- a/lib/src/AstcConverter.cpp +++ b/lib/src/AstcConverter.cpp @@ -24,9 +24,23 @@ #if CUTTLEFISH_HAS_ASTC +#if CUTTLEFISH_GCC || CUTTLEFISH_CLANG +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#elif CUTTLEFISH_MSC +#pragma warning(push) +#pragma warning(disable: 4244) +#endif + #include "astcenc.h" #include "astcenc_internal.h" +#if CUTTLEFISH_GCC || CUTTLEFISH_CLANG +#pragma GCC diagnostic pop +#elif CUTTLEFISH_MSC +#pragma warning(pop) +#endif + // Need to stub out these functions, since they don't work on all compilers. Return true for all, // since it will only be called if support was compiled in. int cpu_supports_sse41() @@ -92,6 +106,17 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData astcenc_context_alloc(config, 1, &context); + swizzle.r = converter.m_colorMask.r ? ASTCENC_SWZ_R : ASTCENC_SWZ_0; + swizzle.g = converter.m_colorMask.g ? ASTCENC_SWZ_G : ASTCENC_SWZ_0; + swizzle.b = converter.m_colorMask.b ? ASTCENC_SWZ_B : ASTCENC_SWZ_0; + if (converter.m_colorMask.a) + { + swizzle.a = converter.m_alphaType == Texture::Alpha::None ? + ASTCENC_SWZ_1 : ASTCENC_SWZ_A; + } + else + swizzle.a = ASTCENC_SWZ_0; + dummyImage.dim_x = converter.m_blockX; dummyImage.dim_y = converter.m_blockY; dummyImage.dim_z = 1; @@ -104,18 +129,6 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData context->input_averages = m_inputAverages; context->input_variances = m_inputVariances; context->input_alpha_averages = m_inputAlphaAverages; - - astcenc_swizzle swizzle; - swizzle.r = converter.m_colorMask.r ? ASTCENC_SWZ_R : ASTCENC_SWZ_0; - swizzle.g = converter.m_colorMask.g ? ASTCENC_SWZ_G : ASTCENC_SWZ_0; - swizzle.b = converter.m_colorMask.b ? ASTCENC_SWZ_B : ASTCENC_SWZ_0; - if (converter.m_colorMask.a) - { - swizzle.a = converter.m_alphaType == Texture::Alpha::None ? - ASTCENC_SWZ_1 : ASTCENC_SWZ_A; - } - else - swizzle.a = ASTCENC_SWZ_0; init_compute_averages_and_variances(dummyImage, context->config.v_rgb_power, context->config.v_a_power, context->config.v_rgba_radius, context->config.a_scale_radius, swizzle, context->arg, context->ag); @@ -128,6 +141,7 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData } astcenc_context* context; + astcenc_swizzle swizzle; astcenc_image dummyImage; compress_symbolic_block_buffers tempBuffers; @@ -137,13 +151,6 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData float m_inputAlphaAverages[maxBlockSize*maxBlockSize]; }; -static bool initialize() -{ - prepare_angular_tables(); - build_quantization_mode_table(); - return true; -} - AstcConverter::AstcConverter(const Texture& texture, const Image& image, unsigned int blockX, unsigned int blockY, Texture::Quality quality) : Converter(image), m_blockX(blockX), m_blockY(blockY), @@ -151,9 +158,6 @@ AstcConverter::AstcConverter(const Texture& texture, const Image& image, unsigne m_quality(quality), m_alphaType(texture.alphaType()), m_colorSpace(texture.colorSpace()), m_colorMask(texture.colorMask()), m_hdr(texture.type() == Texture::Type::UFloat) { - static bool initialized = initialize(); - (void)initialized; - assert(texture.type() == Texture::Type::UNorm || texture.type() == Texture::Type::UFloat); data().resize(m_jobsX*m_jobsY*blockSize); } @@ -165,10 +169,6 @@ AstcConverter::~AstcConverter() void AstcConverter::process(unsigned int x, unsigned int y, ThreadData* threadData) { ColorRGBAf imageData[maxBlockSize*maxBlockSize]; - imageblock astcBlock; - astcBlock.xpos = x*m_blockX; - astcBlock.ypos = y*m_blockY; - astcBlock.zpos = 0; for (unsigned int j = 0, index = 0; j < m_blockY; ++j) { auto scanline = reinterpret_cast(image().scanline( @@ -177,25 +177,19 @@ void AstcConverter::process(unsigned int x, unsigned int y, ThreadData* threadDa { unsigned int scanlineIdx = std::min(x*m_blockX + i, image().width() - 1); imageData[index] = scanline[scanlineIdx]; - astcBlock.data_r[index] = scanline[scanlineIdx].r; - astcBlock.data_g[index] = scanline[scanlineIdx].g; - astcBlock.data_b[index] = scanline[scanlineIdx].b; - astcBlock.data_a[index] = scanline[scanlineIdx].a; - astcBlock.rgb_lns[index] = m_hdr; - astcBlock.alpha_lns[index] = m_hdr; - astcBlock.nan_texel[index] = false; } } auto astcThreadData = static_cast(threadData); - void* imageDataPtr = imageData; - astcThreadData->dummyImage.data = &imageDataPtr; + astcenc_image& dummyImage = astcThreadData->dummyImage; + astcenc_context* context = astcThreadData->context; - // Fill in the rest of the information. - imageblock_initialize_work_from_orig(&astcBlock, m_blockX*m_blockY); - update_imageblock_flags(&astcBlock, m_blockX, m_blockY, 1); + void* imageDataPtr = imageData; + dummyImage.data = &imageDataPtr; + imageblock astcBlock; + fetch_imageblock(m_hdr ? ASTCENC_PRF_HDR : ASTCENC_PRF_LDR, dummyImage, &astcBlock, + context->bsd, 0, 0, 0, astcThreadData->swizzle); - astcenc_context* context = astcThreadData->context; if (astcThreadData->context->input_averages) { // This assumes that it's going to be a pool of thread jobs, so always make sure there's diff --git a/lib/src/Converter.cpp b/lib/src/Converter.cpp index 2a87a0e..1d2da70 100644 --- a/lib/src/Converter.cpp +++ b/lib/src/Converter.cpp @@ -505,9 +505,13 @@ bool Converter::convert(const Texture& texture, MipImageList& images, MipTexture Texture::Quality quality, unsigned int threadCount) { std::vector> jobs; + std::vector> threadData; std::vector threads; if (threadCount > 1) + { + threadData.reserve(threadCount); threads.reserve(threadCount); + } textureData.resize(images.size()); for (unsigned int mip = 0; mip < images.size(); ++mip) @@ -540,19 +544,23 @@ bool Converter::convert(const Texture& texture, MipImageList& images, MipTexture unsigned int curThreads = std::min(jobsX*jobsY, threadCount); if (curThreads <= 1) { - std::unique_ptr threadData = converter->createThreadData(); + std::unique_ptr singleThreadData = converter->createThreadData(); for (const std::pair& job : jobs) - converter->process(job.first, job.second, threadData.get()); + converter->process(job.first, job.second, singleThreadData.get()); } else { std::atomic curJob(0); + // Initialize all thread data first in case they work with global data, such as + // library initialization. (some of which is beyond our control) + for (unsigned int i = 0; i < curThreads; ++i) + threadData.push_back(converter->createThreadData()); + for (unsigned int i = 0; i < curThreads; ++i) { - threads.emplace_back([&curJob, &jobs, &converter]() + Converter::ThreadData* threadDataPtr = threadData[i].get(); + threads.emplace_back([&curJob, &jobs, &converter, threadDataPtr]() { - std::unique_ptr threadData = - converter->createThreadData(); do { unsigned int thisJob = curJob++; @@ -560,13 +568,14 @@ bool Converter::convert(const Texture& texture, MipImageList& images, MipTexture return; converter->process(jobs[thisJob].first, jobs[thisJob].second, - threadData.get()); + threadDataPtr); } while (true); }); } for (std::thread& thread : threads) thread.join(); + threadData.clear(); threads.clear(); }