From e1b092a3f70a5fe16481dcaa0e71157fbe9a7d40 Mon Sep 17 00:00:00 2001 From: "AP\\vsriperu" Date: Thu, 29 Jan 2026 20:16:49 +0530 Subject: [PATCH 1/2] 1. A batch file to make ARM64EC project 2. Make use of NEON intrinsics for ARM64EC ABI. 3. MSVC compiler causes internal compiler error in AABox4VsBox functions. It gets mitigated only if 'g' optimisation is disabled. A separate complaint is raised for Microsoft team to look into this compiler error. Until Microsoft resolves this compiler error, it is handled. --- Build/cmake_vs2022_cl_arm_ec.bat | 3 +++ Jolt/Core/Core.h | 33 ++++++++++++++++++-------------- Jolt/Geometry/AABox4.h | 9 +++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 Build/cmake_vs2022_cl_arm_ec.bat diff --git a/Build/cmake_vs2022_cl_arm_ec.bat b/Build/cmake_vs2022_cl_arm_ec.bat new file mode 100644 index 000000000..f29732fe1 --- /dev/null +++ b/Build/cmake_vs2022_cl_arm_ec.bat @@ -0,0 +1,3 @@ +@echo off +cmake -S . -B VS2022_CL_ARM_EC -G "Visual Studio 17 2022" -A ARM64EC %* +echo Open VS2022_CL_ARM_EC\JoltPhysics.sln to build the project. \ No newline at end of file diff --git a/Jolt/Core/Core.h b/Jolt/Core/Core.h index 321c90c3f..90d98b58e 100644 --- a/Jolt/Core/Core.h +++ b/Jolt/Core/Core.h @@ -117,7 +117,25 @@ #endif // Detect CPU architecture -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(_ARM64EC_) || defined(_M_ARM64EC) + // ARM CPU architecture + #define JPH_CPU_ARM + + #if defined(_M_ARM64EC) || defined(_ARM64EC_) + #define JPH_ARM_EC + #endif + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #define JPH_CPU_ADDRESS_BITS 64 + #define JPH_USE_NEON + #define JPH_VECTOR_ALIGNMENT 16 + #define JPH_DVECTOR_ALIGNMENT 32 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries + #define JPH_DVECTOR_ALIGNMENT 8 + #endif +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) // X86 CPU architecture #define JPH_CPU_X86 #if defined(__x86_64__) || defined(_M_X64) @@ -167,19 +185,6 @@ #error Undefined compiler #endif #endif -#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) - // ARM CPU architecture - #define JPH_CPU_ARM - #if defined(__aarch64__) || defined(_M_ARM64) - #define JPH_CPU_ARCH_BITS 64 - #define JPH_USE_NEON - #define JPH_VECTOR_ALIGNMENT 16 - #define JPH_DVECTOR_ALIGNMENT 32 - #else - #define JPH_CPU_ARCH_BITS 32 - #define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries - #define JPH_DVECTOR_ALIGNMENT 8 - #endif #elif defined(__riscv) // RISC-V CPU architecture #define JPH_CPU_RISCV diff --git a/Jolt/Geometry/AABox4.h b/Jolt/Geometry/AABox4.h index 4465d4dab..5f992f2b6 100644 --- a/Jolt/Geometry/AABox4.h +++ b/Jolt/Geometry/AABox4.h @@ -84,6 +84,9 @@ JPH_INLINE UVec4 AABox4VsPoint(Vec3Arg inPoint, Vec4Arg inBoxMinX, Vec4Arg inBox return UVec4::sAnd(UVec4::sAnd(overlapx, overlapy), overlapz); } +#if defined(JPH_ARM_EC) + #pragma optimize("g", off) +#endif /// Test if 4 bounding boxes overlap with an oriented box JPH_INLINE UVec4 AABox4VsBox(Mat44Arg inOrientation, Vec3Arg inHalfExtents, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ, float inEpsilon = 1.0e-6f) { @@ -189,6 +192,12 @@ JPH_INLINE UVec4 AABox4VsBox(const OrientedBox &inBox, Vec4Arg inBoxMinX, Vec4Ar return AABox4VsBox(inBox.mOrientation, inBox.mHalfExtents, inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ, inEpsilon); } +#if defined(JPH_ARM_EC) + #ifndef _DEBUG + #pragma optimize("g", on) + #endif +#endif + /// Get the squared distance between 4 AABoxes and a point JPH_INLINE Vec4 AABox4DistanceSqToPoint(Vec4Arg inPointX, Vec4Arg inPointY, Vec4Arg inPointZ, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ) { From 8c63cbe971c7acf79b6d828e73c08906f46453af Mon Sep 17 00:00:00 2001 From: Jorrit Rouwe Date: Thu, 29 Jan 2026 21:14:29 +0100 Subject: [PATCH 2/2] Code fixes --- Build/CMakeLists.txt | 1 + Jolt/Core/Core.h | 13 ++++--------- Jolt/Geometry/AABox4.h | 9 --------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/Build/CMakeLists.txt b/Build/CMakeLists.txt index f701a55a9..50b7c80d4 100644 --- a/Build/CMakeLists.txt +++ b/Build/CMakeLists.txt @@ -297,6 +297,7 @@ function(SET_INTERPROCEDURAL_OPTIMIZATION) # On ARM, whole program optimization triggers an internal compiler error during code gen, so we don't turn it on # When compiling as a shared lib with MinGW, turning on LTO causes errors of the form 'ld.exe: cannot export symbol X wrong type (4 vs 3)' if (INTERPROCEDURAL_OPTIMIZATION + AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64EC") AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64") AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM") AND (NOT CROSS_COMPILE_ARM OR ("${CROSS_COMPILE_ARM_TARGET}" STREQUAL "aarch64-linux-gnu")) diff --git a/Jolt/Core/Core.h b/Jolt/Core/Core.h index 90d98b58e..1690e623e 100644 --- a/Jolt/Core/Core.h +++ b/Jolt/Core/Core.h @@ -117,21 +117,16 @@ #endif // Detect CPU architecture -#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(_ARM64EC_) || defined(_M_ARM64EC) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64EC) // ARM CPU architecture #define JPH_CPU_ARM - - #if defined(_M_ARM64EC) || defined(_ARM64EC_) - #define JPH_ARM_EC - #endif - -#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) - #define JPH_CPU_ADDRESS_BITS 64 + #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #define JPH_CPU_ARCH_BITS 64 #define JPH_USE_NEON #define JPH_VECTOR_ALIGNMENT 16 #define JPH_DVECTOR_ALIGNMENT 32 #else - #define JPH_CPU_ADDRESS_BITS 32 + #define JPH_CPU_ARCH_BITS 32 #define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries #define JPH_DVECTOR_ALIGNMENT 8 #endif diff --git a/Jolt/Geometry/AABox4.h b/Jolt/Geometry/AABox4.h index 5f992f2b6..4465d4dab 100644 --- a/Jolt/Geometry/AABox4.h +++ b/Jolt/Geometry/AABox4.h @@ -84,9 +84,6 @@ JPH_INLINE UVec4 AABox4VsPoint(Vec3Arg inPoint, Vec4Arg inBoxMinX, Vec4Arg inBox return UVec4::sAnd(UVec4::sAnd(overlapx, overlapy), overlapz); } -#if defined(JPH_ARM_EC) - #pragma optimize("g", off) -#endif /// Test if 4 bounding boxes overlap with an oriented box JPH_INLINE UVec4 AABox4VsBox(Mat44Arg inOrientation, Vec3Arg inHalfExtents, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ, float inEpsilon = 1.0e-6f) { @@ -192,12 +189,6 @@ JPH_INLINE UVec4 AABox4VsBox(const OrientedBox &inBox, Vec4Arg inBoxMinX, Vec4Ar return AABox4VsBox(inBox.mOrientation, inBox.mHalfExtents, inBoxMinX, inBoxMinY, inBoxMinZ, inBoxMaxX, inBoxMaxY, inBoxMaxZ, inEpsilon); } -#if defined(JPH_ARM_EC) - #ifndef _DEBUG - #pragma optimize("g", on) - #endif -#endif - /// Get the squared distance between 4 AABoxes and a point JPH_INLINE Vec4 AABox4DistanceSqToPoint(Vec4Arg inPointX, Vec4Arg inPointY, Vec4Arg inPointZ, Vec4Arg inBoxMinX, Vec4Arg inBoxMinY, Vec4Arg inBoxMinZ, Vec4Arg inBoxMaxX, Vec4Arg inBoxMaxY, Vec4Arg inBoxMaxZ) {