Update to version 4.4 (#258)

* Update to version 4.4 * Update GitHub workflow file * Fix GitHub workflow issue
GPUOpen-Tools · Jul 10, 2023 · e9b1b99 · e9b1b99
1 parent 38081a0
commit e9b1b99
Show file tree

Hide file tree

Showing 165 changed files with 15,518 additions and 17,283 deletions.
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -5,6 +5,7 @@ on: [push]
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
   BUILD_TYPE: Release
+  OPENCV_DIR: C:/opencv/
 
 jobs:
   build_compressonator_cli:
@@ -16,24 +17,24 @@ jobs:
     - name: Create Build Environment
       # Some projects don't allow in-source building, so create a separate build directory
       # We'll use this as our working directory for all subsequent commands
-      run: cmake -E make_directory ${{github.workspace}}/build
+      run: cmake -E make_directory ${{github.workspace}}/build/bin
 
     - name: Sync external libs
-      working-directory: ${{github.workspace}}/scripts
+      working-directory: ${{github.workspace}}/build
       run: python fetch_dependencies.py
 
     - name: Configure CMake
       # Use a bash shell so we can use the same syntax for environment variable
       # access regardless of the host operating system
       shell: bash
-      working-directory: ${{github.workspace}}/build
+      working-directory: ${{github.workspace}}/build/bin
       # Note the current convention is to use the -S and -B options here to specify source 
       # and build directories, but this is only available with CMake 3.13 and higher.  
       # The CMake binaries on the Github Actions machines are (as of this writing) 3.12
       run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DOPTION_ENABLE_ALL_APPS=OFF -DOPTION_BUILD_APPS_CMP_CLI=ON 
 
     - name: Build
-      working-directory: ${{github.workspace}}/build
+      working-directory: ${{github.workspace}}/build/bin
       shell: bash      
       # Execute the build.  You can specify a specific target with "--target <NAME>"
       run: cmake --build . --config $BUILD_TYPE
@@ -42,7 +43,7 @@ jobs:
       uses: actions/upload-artifact@v3
       with: 
           name: CompressonatorCLI_64_Windows_Master_Build
-          path: ${{github.workspace}}/build/bin/Release
+          path: ${{github.workspace}}/build/bin/bin/Release
 
   build_compressonator_framework:
     runs-on: windows-latest
@@ -53,10 +54,10 @@ jobs:
       - name: Create Build Environment
         # Some projects don't allow in-source building, so create a separate build directory
         # We'll use this as our working directory for all subsequent commands
-        run: cmake -E make_directory ${{github.workspace}}/build
+        run: cmake -E make_directory ${{github.workspace}}/build/bin
 
       - name: Sync external libs
-        working-directory: ${{github.workspace}}/scripts
+        working-directory: ${{github.workspace}}/build/
         run: python fetch_dependencies.py
 
       - name: Add msbuild to PATH
@@ -120,11 +121,11 @@ jobs:
       - name: Copy files into build results directory
         shell: bash
         run: |
-          cp build/Release*/x64/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x64/
-          cp build/Release*/Win32/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x86/
-          cp build/Debug*/x64/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x64/
-          cp build/Debug*/Win32/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x86/
-          cp build/Release/x64/Plugins/Compute/{*.h,*.hlsl,*.cpp} CompressonatorFramework_result/lib/encoders/
+          cp build/bin/Release*/x64/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x64/
+          cp build/bin/Release*/Win32/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x86/
+          cp build/bin/Debug*/x64/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x64/
+          cp build/bin/Debug*/Win32/{*.lib,*.dll} CompressonatorFramework_result/lib/VS2019/x86/
+          cp build/bin/Release/x64/Plugins/Compute/{*.h,*.hlsl,*.cpp} CompressonatorFramework_result/lib/encoders/
           cp cmp_compressonatorlib/compressonator.h CompressonatorFramework_result/include/
 
       # Will probably want to collect the files into a better structure before running this command

diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@
 *.VC.db
 *.VC.VC.opendb
 *.vs
+*.vscode
 *CMakeLists.txt.user
 *vcxproj.user
 Applications/_Plugins/C3DModel_viewers/glTF_DX12_EX/GeneratedFiles/*
@@ -19,3 +20,18 @@ build-out/*
 docs/build/*
 External/build-out/*
 External/build/*
+external/brotlig/brotlig/build/*
+
+build/bin/*
+build/sdk/bin/*
+build/TEMP/*
+build/Debug*
+build/Release*
+
+*/shaders/compiled/*
+internal/tests/output/*
+internal/tests/testenv/*
+internal/tests/scripts/temp/*
+__pycache__
+*.aps
+transcode_temp.dds
diff --git a/.gitmodules b/.gitmodules
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,7 +6,6 @@ cmake_minimum_required(VERSION 3.10)
 # cmake -DOPTION_ENABLE_ALL_APPS=OFF -G "Visual Studio 15 2017 Win64"    Disable all builds except external libs, minimal cmake base setup 
 # cmake -DOPTION_ENABLE_ALL_APPS=OFF -DOPTION_BUILD_APPS_CMP_CLI=ON -G "Visual Studio 15 2017 Win64"   Enable only CLI app build
 # cmake -DOPTION_ENABLE_ALL_APPS=OFF -DOPTION_BUILD_APPS_CMP_GUI=ON -G "Visual Studio 15 2017 Win64"   Enable only GUI app build
-# cmake -DOPTION_ENABLE_ALL_APPS=OFF -DOPTION_BUILD_APPS_TESTCORE=ON -G "Visual Studio 15 2017 Win64"  Enable only Test Core App build
 #
 # Other options are available : see OPTION_BUILDS... 
 #
@@ -19,8 +18,15 @@ if(POLICY CMP0076)
   cmake_policy(SET CMP0076 NEW)
 endif()
 
-set(CMP_VERSION_MAJOR 4)
-set(CMP_VERSION_MINOR 3)
+# Setting version information
+
+if (NOT CMP_VERSION_MAJOR)
+    set(CMP_VERSION_MAJOR 4)
+endif()
+
+if (NOT CMP_VERSION_MINOR)
+    set(CMP_VERSION_MINOR 4)
+endif()
 
 if (NOT CMP_VERSION_BUILD_NUMBER)
     set(CMP_VERSION_BUILD_NUMBER 0)
@@ -52,9 +58,8 @@ if (CMP_HOST_LINUX OR CMP_HOST_WINDOWS)
         if("${i}" STREQUAL "cxx_std_17")
             set(COMPILER_SUPPORTS_CXX17 ON)
             message("Complier Supports ${i}")
-    endif()
-    # message("${i}")
-endforeach()
+        endif()
+    endforeach()
 else()
     message("UNIX Build: CHECK_CXX_COMPILER_FLAG defaulted to -std=c++11")
     set(COMPILER_SUPPORTS_CXX11 ON)
@@ -68,12 +73,12 @@ option(OPTION_ENABLE_ALL_APPS "Enable all apps" ON)
 if (OPTION_ENABLE_ALL_APPS) 
     set(OPTION_BUILD_APPS_CMP_CLI     "Build Application: compressonatorcli" ON)
     set(OPTION_BUILD_APPS_CMP_GUI     "Build Application: compressonator gui" ON)
-    set(OPTION_BUILD_APPS_CMP_TESTCORE "Build Application: cmp_testcore" ON)
+    set(OPTION_BUILD_APPS_CMP_UNITTESTS "Build Application: cmp_unittests" ON)
     set(OPTION_BUILD_APPS_CMP_EXAMPLES "Build Application: examples" ON)
 else()
     option(OPTION_BUILD_APPS_CMP_CLI       OFF)
     option(OPTION_BUILD_APPS_CMP_GUI       OFF)
-    option(OPTION_BUILD_APPS_CMP_TESTCORE  OFF)
+    option(OPTION_BUILD_APPS_CMP_UNITTESTS  OFF)
     option(OPTION_BUILD_APPS_CMP_EXAMPLES  OFF)
 endif()
 
@@ -156,7 +161,7 @@ add_compile_definitions(
     #USE_GLTF2_MIPSET                             # Enable Image Transcode & Compression support for GLTF files using TextureIO
     #USE_FILEIO                                   # Used for debugging code
 
-    #New features under developement
+    #New features under development
     #USE_BASIS                                    # Future release:: Universal format for transcoding codecs
     #USE_CMP_TRANSCODE                            # Future release:: Transcode BASIS/GTC to other compressed formats
 
@@ -187,15 +192,15 @@ endif()
 if (OPTION_CMP_GTC)
     add_compile_definitions(
         USE_GTC                                      # Patented: LDR Gradient Texture Compressor 
-        )
+    )
 endif()
 
 # Additional options
 if (OPTION_BUILD_DRACO)
     add_compile_definitions(
         USE_MESH_DRACO_EXTENSION    # Mesh Compression with Draco support in glTF and OBJ files only
         USE_KTX_EXTENSION           # Add KTX file support
-        )
+    )
 endif()
 
 
@@ -267,7 +272,7 @@ else()
     set(OpenGL_GL_PREFERENCE "GLVND")
 
     if (CMP_HOST_LINUX AND OPTION_CMP_QT)
-        find_package(Qt5 COMPONENTS Gui Widgets OpenGL Qml WebEngineWidgets Xml REQUIRED HINTS ${QT_PACKAGE_ROOT})
+        find_package(Qt5 COMPONENTS Gui Widgets OpenGL Qml WebEngineWidgets Xml REQUIRED HINTS ${QT_DIR})
         if (Qt5Gui_FOUND)
         else()
           message(FATAL_ERROR "Package Qt5 (Qt5Gui) are required, but not found. "
@@ -415,16 +420,16 @@ if (CMP_HOST_WINDOWS)
         add_subdirectory(examples)
     endif()
 
-    # Core Test and CLI
-    if (OPTION_BUILD_APPS_CMP_TESTCORE)
-          message("Build cmp core setup")
-         add_subdirectory(cmp_core/test)
+    # Unit tests
+    if (OPTION_BUILD_APPS_CMP_UNITTESTS)
+          message("Build cmp unittests setup")
+         add_subdirectory(cmp_unittests)
     endif()
 
     # CLI + GUI Automation testing
     if (OPTION_BUILD_INTERNAL_CMP_TEST)
          message("Build cmp test setup")
-         add_subdirectory(internal/applications/cmp_test)
+         add_subdirectory(compressonator_internal/applications/cmp_test)
     endif()
 
 endif()

diff --git a/applications/_libs/cmp_math/CMakeLists.txt b/applications/_libs/cmp_math/CMakeLists.txt
@@ -11,8 +11,8 @@ target_sources(CMP_Math PRIVATE
 
     cmp_math_common.cpp
     cmp_math_common.h
-    cmp_math_cpuid.cpp
-    cmp_math_cpuid.h
+    cpu_extensions.cpp
+    cpu_extensions.h
     jml.h
     jmlfuncs.cpp
     jmlfuncs.h

diff --git a/applications/_libs/cmp_math/cmp_intrinsics.h b/applications/_libs/cmp_math/cmp_intrinsics.h
@@ -0,0 +1,130 @@
+//=====================================================================
+// Copyright 2023 (c), Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+//=====================================================================
+
+#include <immintrin.h>
+
+// Horizontal add on a single SSE register. Computes the sum of all values in the input, returning a 128-bit value with the resulting sum in each lane.
+static inline __m128 _mm_hadd_ps(__m128 v)
+{
+    __m128 shuffled = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 1, 0, 3));
+    __m128 intermediateSum = _mm_add_ps(v, shuffled);
+
+    __m128 shuffledTwice = _mm_shuffle_ps(intermediateSum, intermediateSum, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128 sum = _mm_add_ps(intermediateSum, shuffledTwice);
+
+    return sum;
+}
+
+// Horizontal add on a single AVX register. Computes the sum of all values in the input, returning a 256-bit value with the resulting sum in each lane.
+static inline __m256 _mm256_hadd_ps(__m256 v)
+{
+    __m128 v1 = _mm256_extractf128_ps(v, 0);
+    __m128 v2 = _mm256_extractf128_ps(v, 1);
+
+    __m128 sumV1 = _mm_hadd_ps(v1);
+    __m128 sumV2 = _mm_hadd_ps(v2);
+
+    __m128 sum = _mm_add_ps(sumV1, sumV2);
+
+    return _mm256_set_m128(sum, sum);
+}
+
+// Horizontal minimum on a single SSE register. Calculates the min value in the input and returns a 128-bit value with the minimum value in each lane.
+static inline __m128 _mm_hmin_ps(__m128 v)
+{
+    __m128 shuffled = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 1, 0, 3));
+    __m128 intermediateMin = _mm_min_ps(v, shuffled);
+
+    __m128 shuffledTwice = _mm_shuffle_ps(intermediateMin, intermediateMin, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128 min = _mm_min_ps(intermediateMin, shuffledTwice);
+
+    return min;
+}
+
+// Horizontal minimum on a single AVX register. Calculates the min value in the input and returns a 256-bit value with the minimum value in each lane.
+static inline __m256 _mm256_hmin_ps(__m256 v)
+{
+    __m128 v1 = _mm256_extractf128_ps(v, 0);
+    __m128 v2 = _mm256_extractf128_ps(v, 1);
+
+    __m128 minV1 = _mm_hmin_ps(v1);
+    __m128 minV2 = _mm_hmin_ps(v2);
+
+    __m128 min = _mm_min_ps(minV1, minV2);
+
+    return _mm256_set_m128(min, min);
+}
+
+// Horizontal maximum on a single SSE register. Calculates the max value in the input and returns a 128-bit value with the maximum value in each lane.
+static inline __m128 _mm_hmax_ps(__m128 v)
+{
+    __m128 shuffled = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 1, 0, 3));
+    __m128 intermediateMax = _mm_max_ps(v, shuffled);
+
+    __m128 shuffledTwice = _mm_shuffle_ps(intermediateMax, intermediateMax, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128 max = _mm_max_ps(intermediateMax, shuffledTwice);
+
+    return max;
+}
+
+// Horizontal maximum on a single AVX register. Calculates the max value in the input and returns a 256-bit value with the maximum value in each lane.
+static inline __m256 _mm256_hmax_ps(__m256 v)
+{
+    __m128 v1 = _mm256_extractf128_ps(v, 0);
+    __m128 v2 = _mm256_extractf128_ps(v, 1);
+
+    __m128 maxV1 = _mm_hmax_ps(v1);
+    __m128 maxV2 = _mm_hmax_ps(v2);
+
+    __m128 max = _mm_max_ps(maxV1, maxV2);
+
+    return _mm256_set_m128(max, max);
+}
+
+// Horizontal minimum on a single SSE register. Calculates the min value in the input and returns a 128-bit value with the minimum value in each lane.
+static __m128i _mm_hmin_epi8(__m128i v)
+{
+    static const alignas(16) unsigned char shuffle8Bit[16] = { 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E };
+    static const alignas(16) unsigned char shuffle16Bit[16] = { 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D};
+
+    __m128i shuffle8BitVector = _mm_load_si128((__m128i*)shuffle8Bit);
+    __m128i shuffle16BitVector = _mm_load_si128((__m128i*)shuffle16Bit);
+
+    // Shuffle and min 8-bit portion
+    __m128i shuffled8Bit = _mm_shuffle_epi8(v, shuffle8BitVector);
+    __m128i min8Bit = _mm_min_epi8(v, shuffled8Bit);
+
+    // Shuffle and min 16-bit portion
+    __m128i shuffled16Bit = _mm_shuffle_epi8(min8Bit, shuffle16BitVector);
+    __m128i min16Bit = _mm_min_epi8(min8Bit, shuffled16Bit);
+
+    // Shuffle and min 32-bit portion
+    __m128i shuffled32Bit = _mm_shuffle_epi32(min16Bit, _MM_SHUFFLE(2, 1, 0, 3));
+    __m128i min32Bit = _mm_min_epi8(min16Bit, shuffled32Bit);
+
+    // Shuffle and min 64-bit portion
+    __m128i shuffled64Bit = _mm_shuffle_epi32(min32Bit, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i min64Bit = _mm_min_epi8(min32Bit, shuffled64Bit);
+
+    return min64Bit;
+}
diff --git a/applications/_libs/cmp_math/cmp_math_common.cpp b/applications/_libs/cmp_math/cmp_math_common.cpp
@@ -1,5 +1,5 @@
 //=====================================================================
-// Copyright 2020 (c), Advanced Micro Devices, Inc. All rights reserved.
+// Copyright 2023 (c), Advanced Micro Devices, Inc. All rights reserved.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -329,7 +329,7 @@ void cmp_set_fma3_features() {
 void cmp_set_cpu_features() {
     // features list in Alphabetical order
     cmp_clampf2   = cpu_clampf;
-    cmp_lerp2    = cpu_lerp2;
+    cmp_lerp2     = cpu_lerp2;
     cmp_maxf2     = cpu_maxf;
     cmp_minf2     = cpu_minf;
     cmp_rsqf2     = cpu_rsqf;
@@ -341,7 +341,7 @@ void cmp_set_cpu_features() {
 #ifndef __linux__
 void cmp_set_sse2_features() {
     cmp_clampf2   = sse_clampf;
-    cmp_lerp2    = sse_lerp2;
+    cmp_lerp2     = sse_lerp2;
     cmp_maxf2     = sse_maxf;
     cmp_minf2     = sse_minf;
     cmp_rsqf2     = sse_rsqf;