Skip to content

Commit

Permalink
More fixes for ASTC.
Browse files Browse the repository at this point in the history
Updated to the latest master of astc-encoder. Use the higher level function
to populate the ASTC encoding block since a dummy image needs to be created
anyway. No longer initialize tables, which are initialized by the context.

Thread data is now created ahead of time before the threads start. This
avoids issues where the table initialization could be done simultaneously
across threads and cause data corruption.

Build on Visual Studio 2019 for Windows. There seems to be an issue with
HDR encoding for 32-bit Windows in release builds on VS 2017, which causes
the HDR test to fail. The ASTC encoding library uses some extreme floating
point values, so this combined with some optimizations could be the cause
of the problem.
  • Loading branch information
akb825 committed Dec 31, 2020
1 parent 6bf549a commit 63a8d17
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 47 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Cuttlefish has been built for and tested on the following platforms:
* Windows (requires Visual Studio 2015 or later)
* Mac OS X

> **Note:** Issues have been observed with ASTC encoding with UFLOAT (i.e. HDR ASTC textures) on 32-bit release builds on Visual Studio 2017. Since the ASTC library uses some extreme float values, I suspect this is causing issues with some of the optimizations on that compiler and architecture. I recommend using Visual Studio 2019 or later on Windows.
# Building and Installing

[CMake](https://cmake.org/) is used as the build system. The way to invoke CMake differs for different platforms.
Expand Down
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ jobs:
displayName: Publish test results release
- job: Windows
pool:
vmImage: vs2017-win2016
vmImage: windows-2019
workspace:
clean: all
strategy:
Expand Down
72 changes: 33 additions & 39 deletions lib/src/AstcConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,23 @@

#if CUTTLEFISH_HAS_ASTC

#if CUTTLEFISH_GCC || CUTTLEFISH_CLANG
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#elif CUTTLEFISH_MSC
#pragma warning(push)
#pragma warning(disable: 4244)
#endif

#include "astcenc.h"
#include "astcenc_internal.h"

#if CUTTLEFISH_GCC || CUTTLEFISH_CLANG
#pragma GCC diagnostic pop
#elif CUTTLEFISH_MSC
#pragma warning(pop)
#endif

// Need to stub out these functions, since they don't work on all compilers. Return true for all,
// since it will only be called if support was compiled in.
int cpu_supports_sse41()
Expand Down Expand Up @@ -92,6 +106,17 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData

astcenc_context_alloc(config, 1, &context);

swizzle.r = converter.m_colorMask.r ? ASTCENC_SWZ_R : ASTCENC_SWZ_0;
swizzle.g = converter.m_colorMask.g ? ASTCENC_SWZ_G : ASTCENC_SWZ_0;
swizzle.b = converter.m_colorMask.b ? ASTCENC_SWZ_B : ASTCENC_SWZ_0;
if (converter.m_colorMask.a)
{
swizzle.a = converter.m_alphaType == Texture::Alpha::None ?
ASTCENC_SWZ_1 : ASTCENC_SWZ_A;
}
else
swizzle.a = ASTCENC_SWZ_0;

dummyImage.dim_x = converter.m_blockX;
dummyImage.dim_y = converter.m_blockY;
dummyImage.dim_z = 1;
Expand All @@ -104,18 +129,6 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData
context->input_averages = m_inputAverages;
context->input_variances = m_inputVariances;
context->input_alpha_averages = m_inputAlphaAverages;

astcenc_swizzle swizzle;
swizzle.r = converter.m_colorMask.r ? ASTCENC_SWZ_R : ASTCENC_SWZ_0;
swizzle.g = converter.m_colorMask.g ? ASTCENC_SWZ_G : ASTCENC_SWZ_0;
swizzle.b = converter.m_colorMask.b ? ASTCENC_SWZ_B : ASTCENC_SWZ_0;
if (converter.m_colorMask.a)
{
swizzle.a = converter.m_alphaType == Texture::Alpha::None ?
ASTCENC_SWZ_1 : ASTCENC_SWZ_A;
}
else
swizzle.a = ASTCENC_SWZ_0;
init_compute_averages_and_variances(dummyImage, context->config.v_rgb_power,
context->config.v_a_power, context->config.v_rgba_radius,
context->config.a_scale_radius, swizzle, context->arg, context->ag);
Expand All @@ -128,6 +141,7 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData
}

astcenc_context* context;
astcenc_swizzle swizzle;
astcenc_image dummyImage;
compress_symbolic_block_buffers tempBuffers;

Expand All @@ -137,23 +151,13 @@ class AstcConverter::AstcThreadData : public Converter::ThreadData
float m_inputAlphaAverages[maxBlockSize*maxBlockSize];
};

static bool initialize()
{
prepare_angular_tables();
build_quantization_mode_table();
return true;
}

AstcConverter::AstcConverter(const Texture& texture, const Image& image, unsigned int blockX,
unsigned int blockY, Texture::Quality quality)
: Converter(image), m_blockX(blockX), m_blockY(blockY),
m_jobsX((image.width() + blockX - 1)/blockX), m_jobsY((image.height() + blockY - 1)/blockY),
m_quality(quality), m_alphaType(texture.alphaType()), m_colorSpace(texture.colorSpace()),
m_colorMask(texture.colorMask()), m_hdr(texture.type() == Texture::Type::UFloat)
{
static bool initialized = initialize();
(void)initialized;

assert(texture.type() == Texture::Type::UNorm || texture.type() == Texture::Type::UFloat);
data().resize(m_jobsX*m_jobsY*blockSize);
}
Expand All @@ -165,10 +169,6 @@ AstcConverter::~AstcConverter()
void AstcConverter::process(unsigned int x, unsigned int y, ThreadData* threadData)
{
ColorRGBAf imageData[maxBlockSize*maxBlockSize];
imageblock astcBlock;
astcBlock.xpos = x*m_blockX;
astcBlock.ypos = y*m_blockY;
astcBlock.zpos = 0;
for (unsigned int j = 0, index = 0; j < m_blockY; ++j)
{
auto scanline = reinterpret_cast<const ColorRGBAf*>(image().scanline(
Expand All @@ -177,25 +177,19 @@ void AstcConverter::process(unsigned int x, unsigned int y, ThreadData* threadDa
{
unsigned int scanlineIdx = std::min(x*m_blockX + i, image().width() - 1);
imageData[index] = scanline[scanlineIdx];
astcBlock.data_r[index] = scanline[scanlineIdx].r;
astcBlock.data_g[index] = scanline[scanlineIdx].g;
astcBlock.data_b[index] = scanline[scanlineIdx].b;
astcBlock.data_a[index] = scanline[scanlineIdx].a;
astcBlock.rgb_lns[index] = m_hdr;
astcBlock.alpha_lns[index] = m_hdr;
astcBlock.nan_texel[index] = false;
}
}

auto astcThreadData = static_cast<AstcThreadData*>(threadData);
void* imageDataPtr = imageData;
astcThreadData->dummyImage.data = &imageDataPtr;
astcenc_image& dummyImage = astcThreadData->dummyImage;
astcenc_context* context = astcThreadData->context;

// Fill in the rest of the information.
imageblock_initialize_work_from_orig(&astcBlock, m_blockX*m_blockY);
update_imageblock_flags(&astcBlock, m_blockX, m_blockY, 1);
void* imageDataPtr = imageData;
dummyImage.data = &imageDataPtr;
imageblock astcBlock;
fetch_imageblock(m_hdr ? ASTCENC_PRF_HDR : ASTCENC_PRF_LDR, dummyImage, &astcBlock,
context->bsd, 0, 0, 0, astcThreadData->swizzle);

astcenc_context* context = astcThreadData->context;
if (astcThreadData->context->input_averages)
{
// This assumes that it's going to be a pool of thread jobs, so always make sure there's
Expand Down
21 changes: 15 additions & 6 deletions lib/src/Converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,9 +505,13 @@ bool Converter::convert(const Texture& texture, MipImageList& images, MipTexture
Texture::Quality quality, unsigned int threadCount)
{
std::vector<std::pair<unsigned int, unsigned int>> jobs;
std::vector<std::unique_ptr<ThreadData>> threadData;
std::vector<std::thread> threads;
if (threadCount > 1)
{
threadData.reserve(threadCount);
threads.reserve(threadCount);
}

textureData.resize(images.size());
for (unsigned int mip = 0; mip < images.size(); ++mip)
Expand Down Expand Up @@ -540,33 +544,38 @@ bool Converter::convert(const Texture& texture, MipImageList& images, MipTexture
unsigned int curThreads = std::min(jobsX*jobsY, threadCount);
if (curThreads <= 1)
{
std::unique_ptr<ThreadData> threadData = converter->createThreadData();
std::unique_ptr<ThreadData> singleThreadData = converter->createThreadData();
for (const std::pair<unsigned int, unsigned int>& job : jobs)
converter->process(job.first, job.second, threadData.get());
converter->process(job.first, job.second, singleThreadData.get());
}
else
{
std::atomic<unsigned int> curJob(0);
// Initialize all thread data first in case they work with global data, such as
// library initialization. (some of which is beyond our control)
for (unsigned int i = 0; i < curThreads; ++i)
threadData.push_back(converter->createThreadData());

for (unsigned int i = 0; i < curThreads; ++i)
{
threads.emplace_back([&curJob, &jobs, &converter]()
Converter::ThreadData* threadDataPtr = threadData[i].get();
threads.emplace_back([&curJob, &jobs, &converter, threadDataPtr]()
{
std::unique_ptr<ThreadData> threadData =
converter->createThreadData();
do
{
unsigned int thisJob = curJob++;
if (thisJob >= jobs.size())
return;

converter->process(jobs[thisJob].first, jobs[thisJob].second,
threadData.get());
threadDataPtr);
} while (true);
});
}

for (std::thread& thread : threads)
thread.join();
threadData.clear();
threads.clear();
}

Expand Down

0 comments on commit 63a8d17

Please sign in to comment.