From 781df861c9ccc2441c08568d79275dfed6de8c47 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 2 Dec 2025 17:13:14 +0700
Subject: [PATCH 01/27] split out new quaternion hlsl stuff over from hlsl path
 tracer example

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 305 ++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |   1 +
 2 files changed, 306 insertions(+)
 create mode 100644 include/nbl/builtin/hlsl/math/quaternions.hlsl
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
new file mode 100644
index 0000000000..834d41cb54
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -0,0 +1,305 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+
+template<typename T>
+struct truncated_quaternion
+{
+    using this_t = truncated_quaternion<T>;
+    using scalar_type = T;
+    using data_type = vector<T, 3>;
+
+    static this_t create()
+    {
+        this_t q;
+        q.data = data_type(0.0, 0.0, 0.0);
+        return q;
+    }
+
+    data_type data;
+};
+
+template <typename T>
+struct quaternion
+{
+    using this_t = quaternion<T>;
+    using scalar_type = T;
+    using data_type = vector<T, 4>;
+    using vector3_type = vector<T, 3>;
+    using matrix_type = matrix<T, 3, 3>;
+
+    using AsUint = typename unsigned_integer_of_size<sizeof(scalar_type)>::type;
+
+    static this_t create()
+    {
+        this_t q;
+        q.data = data_type(0.0, 0.0, 0.0, 1.0);
+        return q;
+    }
+    
+    static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w)
+    {
+        this_t q;
+        q.data = data_type(x, y, z, w);
+        return q;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(this_t) other)
+    {
+        return other;
+    }
+
+    // angle: Rotation angle expressed in radians.
+    // axis: Rotation axis, must be normalized.
+    static this_t create(scalar_type angle, const vector3_type axis)
+    {
+        this_t q;
+        const scalar_type sinTheta = hlsl::sin(angle * 0.5);
+        const scalar_type cosTheta = hlsl::cos(angle * 0.5);
+        q.data = data_type(axis * sinTheta, cosTheta);
+        return q;
+    }
+
+
+    static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll)
+    {
+        const scalar_type rollDiv2 = roll * scalar_type(0.5);
+        const scalar_type sr = hlsl::sin(rollDiv2);
+        const scalar_type cr = hlsl::cos(rollDiv2);
+
+        const scalar_type pitchDiv2 = pitch * scalar_type(0.5);
+        const scalar_type sp = hlsl::sin(pitchDiv2);
+        const scalar_type cp = hlsl::cos(pitchDiv2);
+
+        const scalar_type yawDiv2 = yaw * scalar_type(0.5);
+        const scalar_type sy = hlsl::sin(yawDiv2);
+        const scalar_type cy = hlsl::cos(yawDiv2);
+
+        this_t output;
+        output.data[0] = cr * sp * cy + sr * cp * sy; // x
+        output.data[1] = cr * cp * sy - sr * sp * cy; // y
+        output.data[2] = sr * cp * cy - cr * sp * sy; // z
+        output.data[3] = cr * cp * cy + sr * sp * sy; // w
+
+        return output;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
+    {
+        const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
+        const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
+        const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
+        const scalar_type neg_m22 = bit_cast<scalar_type>(bit_cast<AsUint>(m22)^0x80000000u);
+        const data_type Qx = data_type(m00, m00, neg_m00, neg_m00);
+        const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
+        const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
+
+        const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        const data_type invscales = hlsl::promote<data_type>(0.5) / hlsl::sqrt(tmp);
+        const data_type scales = tmp * invscales * hlsl::promote<data_type>(0.5);
+
+        // TODO: speed this up
+        this_t retval;
+        if (tmp.x > scalar_type(0.0))
+        {
+            retval.data.x = (m[2][1] - m[1][2]) * invscales.x;
+            retval.data.y = (m[0][2] - m[2][0]) * invscales.x;
+            retval.data.z = (m[1][0] - m[0][1]) * invscales.x;
+            retval.data.w = scales.x;
+        }
+        else
+        {
+            if (tmp.y > scalar_type(0.0))
+            {
+                retval.data.x = scales.y;
+                retval.data.y = (m[0][1] + m[1][0]) * invscales.y;
+                retval.data.z = (m[2][0] + m[0][2]) * invscales.y;
+                retval.data.w = (m[2][1] - m[1][2]) * invscales.y;
+            }
+            else if (tmp.z > scalar_type(0.0))
+            {
+                retval.data.x = (m[0][1] + m[1][0]) * invscales.z;
+                retval.data.y = scales.z;
+                retval.data.z = (m[0][2] - m[2][0]) * invscales.z;
+                retval.data.w = (m[1][2] + m[2][1]) * invscales.z;
+            }
+            else
+            {
+                retval.data.x = (m[0][2] + m[2][0]) * invscales.w;
+                retval.data.y = (m[1][2] + m[2][1]) * invscales.w;
+                retval.data.z = scales.w;
+                retval.data.w = (m[1][0] - m[0][1]) * invscales.w;
+            }
+        }
+
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(truncated_quaternion<T>) first3Components)
+    {
+        this_t retval;
+        retval.data.xyz = first3Components.data;
+        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data));
+        return retval;
+    }
+
+    this_t operator*(scalar_type scalar)
+    {
+        this_t output;
+        output.data = data * scalar;
+        return output;
+    }
+
+    this_t operator*(NBL_CONST_REF_ARG(this_t) other)
+    {
+        return this_t::create(
+            data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
+            data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
+            data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
+            data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
+        );
+    }
+
+    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
+    {
+        const AsUint negationMask = hlsl::bit_cast<AsUint>(totalPseudoAngle) & AsUint(0x80000000u);
+        const data_type adjEnd = hlsl::bit_cast<scalar_type>(hlsl::bit_cast<AsUint>(end.data) ^ negationMask);
+
+        this_t retval;
+        retval.data = hlsl::mix(start.data, adjEnd, fraction);
+        return retval;
+    }
+
+    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        return lerp(start, end, fraction, hlsl::dot(start.data, end.data));
+    }
+
+    static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3)
+    {
+        const scalar_type A = scalar_type(1.0904) + angle * (scalar_type(-3.2452) + angle * (scalar_type(3.55645) - angle * scalar_type(1.43519)));
+        const scalar_type B = scalar_type(0.848013) + angle * (scalar_type(-1.06021) + angle * scalar_type(0.215638));
+        const scalar_type k = A * interpolantPrecalcTerm2 + B;
+        return fraction + interpolantPrecalcTerm3 * k;
+    }
+
+    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
+        const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
+        const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
+        const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3);
+        
+        this_t retval = lerp(start,end,adjFrac,pseudoAngle);
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
+    }
+
+    vector3_type transformVector(const vector3_type v)
+    {
+        scalar_type scale = hlsl::length(data);
+        vector3_type direction = data.xyz;
+        return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
+    }
+
+    matrix_type constructMatrix()
+    {
+        matrix_type mat;
+        mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0);
+        mat[1] = data.yzx * data.xzw + data.zxy * data.wxz * vector3_type(-1.0, 1.0, 1.0);
+        mat[2] = data.yzx * data.wyx + data.zxy * data.xwy * vector3_type( 1.0,-1.0, 1.0);
+        mat[0][0] = scalar_type(0.5) - mat[0][0];
+        mat[1][1] = scalar_type(0.5) - mat[1][1];
+        mat[2][2] = scalar_type(0.5) - mat[2][2];
+        mat *= scalar_type(2.0);
+        return hlsl::transpose(mat);    // TODO: double check transpose?
+    }
+
+    static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
+    {
+        vector3_type planeNormal = hlsl::cross(start,preScaledWaypoint);
+    
+        cosAngleFromStart *= scalar_type(0.5);
+        const scalar_type sinAngle = hlsl::sqrt(scalar_type(0.5) - cosAngleFromStart);
+        const scalar_type cosAngle = hlsl::sqrt(scalar_type(0.5) + cosAngleFromStart);
+        
+        planeNormal *= sinAngle;
+        const vector3_type precompPart = hlsl::cross(planeNormal, start) * scalar_type(2.0);
+
+        return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
+    }
+
+    this_t inverse()
+    {
+        this_t retval;
+        retval.data.x = bit_cast<scalar_type>(bit_cast<AsUint>(data.x)^0x80000000u);
+        retval.data.y = bit_cast<scalar_type>(bit_cast<AsUint>(data.y)^0x80000000u);
+        retval.data.z = bit_cast<scalar_type>(bit_cast<AsUint>(data.z)^0x80000000u);
+        retval.data.w = data.w;
+        return retval;
+    }
+
+    static this_t normalize(NBL_CONST_REF_ARG(this_t) q)
+    {
+        this_t retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+
+    data_type data;
+};
+
+}
+
+namespace impl
+{
+
+template<typename T>
+struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
+{
+    static inline math::quaternion<T> cast(math::truncated_quaternion<T> q)
+    {
+        return math::quaternion<T>::create(q);
+    }
+};
+
+template<typename T>
+struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
+{
+    static inline math::truncated_quaternion<T> cast(math::quaternion<T> q)
+    {
+        math::truncated_quaternion<T> t;
+        t.data.x = t.data.x;
+        t.data.y = t.data.y;
+        t.data.z = t.data.z;
+        return t;
+    }
+};
+
+template<typename T>
+struct static_cast_helper<matrix<T,3,3>, math::quaternion<T> >
+{
+    static inline matrix<T,3,3> cast(math::quaternion<T> q)
+    {
+        return q.constructMatrix();
+    }
+};
+}
+
+}
+}
+
+#endif
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index e8798499f9..37c5d2e43e 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -225,6 +225,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/polar.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl")

From 1bf0616246f047c9be399181b319e1c5611e4617 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 11 Dec 2025 15:52:35 +0700
Subject: [PATCH 02/27] removed redundant constructors, some reorganize
 quaternions

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 834d41cb54..6114949572 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -48,22 +48,10 @@ struct quaternion
         q.data = data_type(0.0, 0.0, 0.0, 1.0);
         return q;
     }
-    
-    static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w)
-    {
-        this_t q;
-        q.data = data_type(x, y, z, w);
-        return q;
-    }
-
-    static this_t create(NBL_CONST_REF_ARG(this_t) other)
-    {
-        return other;
-    }
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    static this_t create(scalar_type angle, const vector3_type axis)
+    static this_t create(const vector3_type axis, scalar_type angle)
     {
         this_t q;
         const scalar_type sinTheta = hlsl::sin(angle * 0.5);
@@ -72,28 +60,39 @@ struct quaternion
         return q;
     }
 
-
-    static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll)
+    template<typename U=vector<scalar_type,2> NBL_FUNC_REQUIRES(is_same_v<vector<scalar_type,2>,U>)
+    static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin)
     {
-        const scalar_type rollDiv2 = roll * scalar_type(0.5);
-        const scalar_type sr = hlsl::sin(rollDiv2);
-        const scalar_type cr = hlsl::cos(rollDiv2);
+        const scalar_type cp = halfPitchCosSin.x;
+        const scalar_type sp = halfPitchCosSin.y;
 
-        const scalar_type pitchDiv2 = pitch * scalar_type(0.5);
-        const scalar_type sp = hlsl::sin(pitchDiv2);
-        const scalar_type cp = hlsl::cos(pitchDiv2);
+        const scalar_type cy = halfYawCosSin.x;
+        const scalar_type sy = halfYawCosSin.y;
 
-        const scalar_type yawDiv2 = yaw * scalar_type(0.5);
-        const scalar_type sy = hlsl::sin(yawDiv2);
-        const scalar_type cy = hlsl::cos(yawDiv2);
+        const scalar_type cr = halfRollCosSin.x;
+        const scalar_type sr = halfRollCosSin.y;
 
-        this_t output;
-        output.data[0] = cr * sp * cy + sr * cp * sy; // x
-        output.data[1] = cr * cp * sy - sr * sp * cy; // y
-        output.data[2] = sr * cp * cy - cr * sp * sy; // z
-        output.data[3] = cr * cp * cy + sr * sp * sy; // w
+        this_t q;
+        q.data[0] = cr * sp * cy + sr * cp * sy; // x
+        q.data[1] = cr * cp * sy - sr * sp * cy; // y
+        q.data[2] = sr * cp * cy - cr * sp * sy; // z
+        q.data[3] = cr * cp * cy + sr * sp * sy; // w
 
-        return output;
+        return q;
+    }
+
+    template<typename U=scalar_type NBL_FUNC_REQUIRES(is_same_v<scalar_type,U>)
+    static this_t create(const U pitch, const U yaw, const U roll)
+    {
+        const scalar_type halfPitch = pitch * scalar_type(0.5);
+        const scalar_type halfYaw = yaw * scalar_type(0.5);
+        const scalar_type halfRoll = roll * scalar_type(0.5);
+
+        return create(
+            vector<scalar_type,2>(hlsl::cos(halfPitch), hlsl::sin(halfPitch)),
+            vector<scalar_type,2>(hlsl::cos(halfYaw), hlsl::sin(halfYaw)),
+            vector<scalar_type,2>(hlsl::cos(halfRoll), hlsl::sin(halfRoll))
+        );
     }
 
     static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
@@ -165,12 +164,14 @@ struct quaternion
 
     this_t operator*(NBL_CONST_REF_ARG(this_t) other)
     {
-        return this_t::create(
+        this_t retval;
+        retval.data = data_type(
             data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
             data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
             data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
             data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
         );
+        return retval;
     }
 
     static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)

From 8745a33514602e3a3089f588d2988dcb027fe733 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 11 Dec 2025 16:46:12 +0700
Subject: [PATCH 03/27] added some checks to create from rot mat

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 75 ++++++++++++++-----
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 6114949572..49ad0dde96 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -95,8 +95,43 @@ struct quaternion
         );
     }
 
-    static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
+    static bool __isEqual(const scalar_type a, const scalar_type b)
     {
+        return hlsl::max(a/b, b/a) <= scalar_type(1e-4);
+    }
+    static bool __dotIsZero(const vector3_type a, const vector3_type b)
+    {
+        const scalar_type ab = hlsl::dot(a, b);
+        return hlsl::abs(ab) <= scalar_type(1e-4);
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
+    {
+        {
+            // only orthogonal and uniform scale mats can be converted
+            bool valid = __dotIsZero(m[0], m[1]);
+            valid = __dotIsZero(m[1], m[2]) && valid;
+            valid = __dotIsZero(m[0], m[2]) && valid;
+
+            const matrix_type m_T = hlsl::transpose(m);
+            const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]);
+            const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]);
+            const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]);
+            valid = __isEqual(dotCol0, dotCol1) && valid;
+            valid = __isEqual(dotCol1, dotCol2) && valid;
+            valid = __isEqual(dotCol0, dotCol2) && valid;
+
+            if (dontAssertValidMatrix)
+                if (!valid)
+                {
+                    this_t retval;
+                    retval.data = hlsl::promote<data_type>(bit_cast<scalar_type>(numeric_limits<scalar_type>::quiet_NaN));
+                    return retval;
+                }
+            else
+                assert(valid);
+        }
+
         const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
         const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
         const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
@@ -106,40 +141,42 @@ struct quaternion
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
 
         const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
-        const data_type invscales = hlsl::promote<data_type>(0.5) / hlsl::sqrt(tmp);
-        const data_type scales = tmp * invscales * hlsl::promote<data_type>(0.5);
 
         // TODO: speed this up
         this_t retval;
         if (tmp.x > scalar_type(0.0))
         {
-            retval.data.x = (m[2][1] - m[1][2]) * invscales.x;
-            retval.data.y = (m[0][2] - m[2][0]) * invscales.x;
-            retval.data.z = (m[1][0] - m[0][1]) * invscales.x;
-            retval.data.w = scales.x;
+            const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x);
+            retval.data.x = (m[2][1] - m[1][2]) * invscales;
+            retval.data.y = (m[0][2] - m[2][0]) * invscales;
+            retval.data.z = (m[1][0] - m[0][1]) * invscales;
+            retval.data.w = tmp.x * invscales * scalar_type(0.5);
         }
         else
         {
             if (tmp.y > scalar_type(0.0))
             {
-                retval.data.x = scales.y;
-                retval.data.y = (m[0][1] + m[1][0]) * invscales.y;
-                retval.data.z = (m[2][0] + m[0][2]) * invscales.y;
-                retval.data.w = (m[2][1] - m[1][2]) * invscales.y;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y);
+                retval.data.x = tmp.y * invscales * scalar_type(0.5);
+                retval.data.y = (m[0][1] + m[1][0]) * invscales;
+                retval.data.z = (m[2][0] + m[0][2]) * invscales;
+                retval.data.w = (m[2][1] - m[1][2]) * invscales;
             }
             else if (tmp.z > scalar_type(0.0))
             {
-                retval.data.x = (m[0][1] + m[1][0]) * invscales.z;
-                retval.data.y = scales.z;
-                retval.data.z = (m[0][2] - m[2][0]) * invscales.z;
-                retval.data.w = (m[1][2] + m[2][1]) * invscales.z;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z);
+                retval.data.x = (m[0][1] + m[1][0]) * invscales;
+                retval.data.y = tmp.z * invscales * scalar_type(0.5);
+                retval.data.z = (m[0][2] - m[2][0]) * invscales;
+                retval.data.w = (m[1][2] + m[2][1]) * invscales;
             }
             else
             {
-                retval.data.x = (m[0][2] + m[2][0]) * invscales.w;
-                retval.data.y = (m[1][2] + m[2][1]) * invscales.w;
-                retval.data.z = scales.w;
-                retval.data.w = (m[1][0] - m[0][1]) * invscales.w;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w);
+                retval.data.x = (m[0][2] + m[2][0]) * invscales;
+                retval.data.y = (m[1][2] + m[2][1]) * invscales;
+                retval.data.z = tmp.w * invscales * scalar_type(0.5);
+                retval.data.w = (m[1][0] - m[0][1]) * invscales;
             }
         }
 

From 2a8451d73fab71fe283563cbcaff631c07f181e5 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 12 Dec 2025 11:15:26 +0700
Subject: [PATCH 04/27] moved normalize, static_cast to helper specializations,
 norm and unnorm variants for lerp/flerp

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 85 ++++++++++++-------
 1 file changed, 53 insertions(+), 32 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 49ad0dde96..73dc977d62 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -184,14 +184,6 @@ struct quaternion
         return retval;
     }
 
-    static this_t create(NBL_CONST_REF_ARG(truncated_quaternion<T>) first3Components)
-    {
-        this_t retval;
-        retval.data.xyz = first3Components.data;
-        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data));
-        return retval;
-    }
-
     this_t operator*(scalar_type scalar)
     {
         this_t output;
@@ -211,19 +203,26 @@ struct quaternion
         return retval;
     }
 
-    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
+    static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
-        const AsUint negationMask = hlsl::bit_cast<AsUint>(totalPseudoAngle) & AsUint(0x80000000u);
-        const data_type adjEnd = hlsl::bit_cast<scalar_type>(hlsl::bit_cast<AsUint>(end.data) ^ negationMask);
+        // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
+        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, totalPseudoAngle);
 
         this_t retval;
         retval.data = hlsl::mix(start.data, adjEnd, fraction);
         return retval;
     }
 
+    static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        return unnormLerp(start, end, fraction, hlsl::dot(start.data, end.data));
+    }
+
     static this_t lerp(const this_t start, const this_t end, const scalar_type fraction)
     {
-        return lerp(start, end, fraction, hlsl::dot(start.data, end.data));
+        this_t retval = unnormLerp(start, end, fraction);
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
     }
 
     static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3)
@@ -234,26 +233,32 @@ struct quaternion
         return fraction + interpolantPrecalcTerm3 * k;
     }
 
-    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction)
     {
         const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
         const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
         const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
         const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3);
         
-        this_t retval = lerp(start,end,adjFrac,pseudoAngle);
+        this_t retval = unnormLerp(start,end,adjFrac,pseudoAngle);
+        return retval;
+    }
+
+    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    {       
+        this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle);
         retval.data = hlsl::normalize(retval.data);
         return retval;
     }
 
-    vector3_type transformVector(const vector3_type v)
+    vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC
     {
-        scalar_type scale = hlsl::length(data);
+        scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale);
         vector3_type direction = data.xyz;
         return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
     }
 
-    matrix_type constructMatrix()
+    matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC
     {
         matrix_type mat;
         mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0);
@@ -280,23 +285,14 @@ struct quaternion
         return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
     }
 
-    this_t inverse()
+    this_t inverse() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
-        retval.data.x = bit_cast<scalar_type>(bit_cast<AsUint>(data.x)^0x80000000u);
-        retval.data.y = bit_cast<scalar_type>(bit_cast<AsUint>(data.y)^0x80000000u);
-        retval.data.z = bit_cast<scalar_type>(bit_cast<AsUint>(data.z)^0x80000000u);
+        retval.data.xyz = -retval.data.xyz;
         retval.data.w = data.w;
         return retval;
     }
 
-    static this_t normalize(NBL_CONST_REF_ARG(this_t) q)
-    {
-        this_t retval;
-        retval.data = hlsl::normalize(q.data);
-        return retval;
-    }
-
     data_type data;
 };
 
@@ -305,19 +301,44 @@ struct quaternion
 namespace impl
 {
 
+template<typename T>
+struct normalize_helper<math::truncated_quaternion<T> >
+{
+    static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
+    {
+        math::truncated_quaternion<T> retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+}
+
+template<typename T>
+struct normalize_helper<math::quaternion<T> >
+{
+    static inline math::quaternion<T> __call(const math::quaternion<T> q)
+    {
+        math::quaternion<T> retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+}
+
 template<typename T>
 struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
 {
-    static inline math::quaternion<T> cast(math::truncated_quaternion<T> q)
+    static inline math::quaternion<T> cast(const math::truncated_quaternion<T> q)
     {
-        return math::quaternion<T>::create(q);
+        math::quaternion<T> retval;
+        retval.data.xyz = q.data;
+        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data));
+        return retval;
     }
 };
 
 template<typename T>
 struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 {
-    static inline math::truncated_quaternion<T> cast(math::quaternion<T> q)
+    static inline math::truncated_quaternion<T> cast(const math::quaternion<T> q)
     {
         math::truncated_quaternion<T> t;
         t.data.x = t.data.x;
@@ -330,7 +351,7 @@ struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 template<typename T>
 struct static_cast_helper<matrix<T,3,3>, math::quaternion<T> >
 {
-    static inline matrix<T,3,3> cast(math::quaternion<T> q)
+    static inline matrix<T,3,3> cast(const math::quaternion<T> q)
     {
         return q.constructMatrix();
     }

From a93fa2608f608574e17937bf42bdcdc75e17e291 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 12 Dec 2025 15:39:32 +0700
Subject: [PATCH 05/27] fix some quaternion bugs

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 73dc977d62..91ee4975e3 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -246,7 +246,7 @@ struct quaternion
 
     static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
     {       
-        this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle);
+        this_t retval = unnormFlerp(start,end,fraction);
         retval.data = hlsl::normalize(retval.data);
         return retval;
     }
@@ -267,8 +267,10 @@ struct quaternion
         mat[0][0] = scalar_type(0.5) - mat[0][0];
         mat[1][1] = scalar_type(0.5) - mat[1][1];
         mat[2][2] = scalar_type(0.5) - mat[2][2];
-        mat *= scalar_type(2.0);
-        return hlsl::transpose(mat);    // TODO: double check transpose?
+        mat[0] = mat[0] * scalar_type(2.0);
+        mat[1] = mat[1] * scalar_type(2.0);
+        mat[2] = mat[2] * scalar_type(2.0);
+        return mat;// hlsl::transpose(mat);    // TODO: double check transpose?
     }
 
     static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
@@ -298,9 +300,9 @@ struct quaternion
 
 }
 
-namespace impl
-{
 
+namespace cpp_compat_intrinsics_impl
+{
 template<typename T>
 struct normalize_helper<math::truncated_quaternion<T> >
 {
@@ -310,7 +312,7 @@ struct normalize_helper<math::truncated_quaternion<T> >
         retval.data = hlsl::normalize(q.data);
         return retval;
     }
-}
+};
 
 template<typename T>
 struct normalize_helper<math::quaternion<T> >
@@ -321,8 +323,11 @@ struct normalize_helper<math::quaternion<T> >
         retval.data = hlsl::normalize(q.data);
         return retval;
     }
+};
 }
 
+namespace impl
+{
 template<typename T>
 struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
 {

From 2f33aa03cbcdfaf20df6f26c6b6ffac39fb20dfd Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 6 Jan 2026 16:26:43 +0700
Subject: [PATCH 06/27] some fixes to quaternions

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 91ee4975e3..be3b7b8ede 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -184,21 +184,21 @@ struct quaternion
         return retval;
     }
 
-    this_t operator*(scalar_type scalar)
+    this_t operator*(scalar_type scalar) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
         output.data = data * scalar;
         return output;
     }
 
-    this_t operator*(NBL_CONST_REF_ARG(this_t) other)
+    this_t operator*(NBL_CONST_REF_ARG(this_t) other) NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
         retval.data = data_type(
-            data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
-            data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
-            data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
-            data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
+            data.w * other.data.x + data.x * other.data.w + data.y * other.data.z - data.z * other.data.y,
+            data.w * other.data.y - data.x * other.data.z + data.y * other.data.w + data.z * other.data.x,
+            data.w * other.data.z + data.x * other.data.y - data.y * other.data.x + data.z * other.data.w,
+            data.w * other.data.w - data.x * other.data.x - data.y * other.data.y - data.z * other.data.z
         );
         return retval;
     }
@@ -270,7 +270,7 @@ struct quaternion
         mat[0] = mat[0] * scalar_type(2.0);
         mat[1] = mat[1] * scalar_type(2.0);
         mat[2] = mat[2] * scalar_type(2.0);
-        return mat;// hlsl::transpose(mat);    // TODO: double check transpose?
+        return mat;
     }
 
     static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
@@ -335,7 +335,7 @@ struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
     {
         math::quaternion<T> retval;
         retval.data.xyz = q.data;
-        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data));
+        retval.data.w = hlsl::sqrt(T(1.0) - hlsl::dot(q.data, q.data));
         return retval;
     }
 };

From a22d46ae9506112f5ba5830cb81a9b7dbe9b3f81 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 12:08:16 +0700
Subject: [PATCH 07/27] implement quaternion slerp (might need optimizing?)

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 29 +++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index be3b7b8ede..b54e1ad619 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -206,10 +206,10 @@ struct quaternion
     static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
         // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
-        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, totalPseudoAngle);
+        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
 
         this_t retval;
-        retval.data = hlsl::mix(start.data, adjEnd, fraction);
+        retval.data = hlsl::mix(start.data, adjEnd, hlsl::promote<data_type>(fraction));
         return retval;
     }
 
@@ -287,10 +287,33 @@ struct quaternion
         return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
     }
 
+    static this_t slerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type threshold = numeric_limits<scalar_type>::epsilon)
+    {
+        const scalar_type totalPseudoAngle = hlsl::dot(start.data, end.data);
+
+        // make sure we use the short rotation
+        const scalar_type cosA = ieee754::flipSignIfRHSNegative<scalar_type>(totalPseudoAngle, totalPseudoAngle);
+        if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
+        {
+            this_t retval;
+
+            const scalar_type A = hlsl::acos(cosA);
+            const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
+            const scalar_type sinAt = hlsl::sin(fraction * A);
+            const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
+            retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp;
+
+            return retval;
+        }
+        else
+            return unnormLerp(start, end, fraction, totalPseudoAngle);
+            // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle));
+    }
+
     this_t inverse() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
-        retval.data.xyz = -retval.data.xyz;
+        retval.data.xyz = -data.xyz;
         retval.data.w = data.w;
         return retval;
     }

From f71cca19d9d18f99cb4265c6d183999cd618c568 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 13:56:40 +0700
Subject: [PATCH 08/27] minor optimization to slerp

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b54e1ad619..b7f39f19fe 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -296,18 +296,17 @@ struct quaternion
         if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
         {
             this_t retval;
-
-            const scalar_type A = hlsl::acos(cosA);
             const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
-            const scalar_type sinAt = hlsl::sin(fraction * A);
+            const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));
+            const scalar_type sinAt_over_sinA = sinAt*sinARcp;
+            const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A)
             const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
-            retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp;
+            retval.data = scale * start.data + sinAt_over_sinA * adjEnd;
 
             return retval;
         }
         else
             return unnormLerp(start, end, fraction, totalPseudoAngle);
-            // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle));
     }
 
     this_t inverse() NBL_CONST_MEMBER_FUNC

From c39c78a8e7b8d9709ddbc9de602d6dff5573d0da Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 15:26:01 +0700
Subject: [PATCH 09/27] fix create from rotation matrix

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b7f39f19fe..1f720b0247 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -140,42 +140,47 @@ struct quaternion
         const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
 
-        const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        // const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        const data_type tmp = Qx + Qy + Qz;
 
         // TODO: speed this up
         this_t retval;
         if (tmp.x > scalar_type(0.0))
         {
-            const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x);
+            const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0));
+            const scalar_type invscales = scalar_type(0.5) / scales;
             retval.data.x = (m[2][1] - m[1][2]) * invscales;
             retval.data.y = (m[0][2] - m[2][0]) * invscales;
             retval.data.z = (m[1][0] - m[0][1]) * invscales;
-            retval.data.w = tmp.x * invscales * scalar_type(0.5);
+            retval.data.w = scales * scalar_type(0.5);
         }
         else
         {
             if (tmp.y > scalar_type(0.0))
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y);
-                retval.data.x = tmp.y * invscales * scalar_type(0.5);
+                const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
+                retval.data.x = scales * scalar_type(0.5);
                 retval.data.y = (m[0][1] + m[1][0]) * invscales;
                 retval.data.z = (m[2][0] + m[0][2]) * invscales;
                 retval.data.w = (m[2][1] - m[1][2]) * invscales;
             }
             else if (tmp.z > scalar_type(0.0))
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z);
+                const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
                 retval.data.x = (m[0][1] + m[1][0]) * invscales;
-                retval.data.y = tmp.z * invscales * scalar_type(0.5);
-                retval.data.z = (m[0][2] - m[2][0]) * invscales;
-                retval.data.w = (m[1][2] + m[2][1]) * invscales;
+                retval.data.y = scales * scalar_type(0.5);
+                retval.data.z = (m[1][2] + m[2][1]) * invscales;
+                retval.data.w = (m[0][2] - m[2][0]) * invscales;
             }
             else
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w);
+                const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
                 retval.data.x = (m[0][2] + m[2][0]) * invscales;
                 retval.data.y = (m[1][2] + m[2][1]) * invscales;
-                retval.data.z = tmp.w * invscales * scalar_type(0.5);
+                retval.data.z = scales * scalar_type(0.5);
                 retval.data.w = (m[1][0] - m[0][1]) * invscales;
             }
         }

From 0b180c88b65d37acf0ccc0817d5d5d97cd6fdf74 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 16:20:40 +0700
Subject: [PATCH 10/27] force constructor type with requires to avoid dxc
 implicit conversions

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 1f720b0247..4be73482bb 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -51,12 +51,13 @@ struct quaternion
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    static this_t create(const vector3_type axis, scalar_type angle)
+    template<typename U=vector3_type, typename F=scalar_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U> && is_same_v<scalar_type,F>)
+    static this_t create(const U axis, const F angle, const F uniformScale = scalar_type(1.0))
     {
         this_t q;
         const scalar_type sinTheta = hlsl::sin(angle * 0.5);
         const scalar_type cosTheta = hlsl::cos(angle * 0.5);
-        q.data = data_type(axis * sinTheta, cosTheta);
+        q.data = data_type(axis * sinTheta, cosTheta) * uniformScale;
         return q;
     }
 
@@ -301,7 +302,7 @@ struct quaternion
         if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
         {
             this_t retval;
-            const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
+            const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
             const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));
             const scalar_type sinAt_over_sinA = sinAt*sinARcp;
             const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A)

From de1b0d1f5aa4cb4e1b3bd73330315f776de1c108 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 17:13:56 +0700
Subject: [PATCH 11/27] fixes to transformVector and other minor fixes

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 4be73482bb..34abb042fb 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -134,9 +134,9 @@ struct quaternion
         }
 
         const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
-        const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
-        const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
-        const scalar_type neg_m22 = bit_cast<scalar_type>(bit_cast<AsUint>(m22)^0x80000000u);
+        const scalar_type neg_m00 = -m00;
+        const scalar_type neg_m11 = -m11;
+        const scalar_type neg_m22 = -m22;
         const data_type Qx = data_type(m00, m00, neg_m00, neg_m00);
         const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
@@ -186,7 +186,7 @@ struct quaternion
             }
         }
 
-        retval.data = hlsl::normalize(retval.data);
+        retval.data = hlsl::normalize(retval.data) / hlsl::sqrt(hlsl::dot(m[0], m[0])); // restore uniform scale
         return retval;
     }
 
@@ -211,6 +211,8 @@ struct quaternion
 
     static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
+        assert(hlsl::length(start.data) == scalar_type(1.0));
+        assert(hlsl::length(end.data) == scalar_type(1.0));
         // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
         const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
 
@@ -241,6 +243,9 @@ struct quaternion
 
     static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction)
     {
+        assert(hlsl::length(start.data) == scalar_type(1.0));
+        assert(hlsl::length(end.data) == scalar_type(1.0));
+
         const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
         const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
         const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
@@ -259,9 +264,10 @@ struct quaternion
 
     vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC
     {
-        scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale);
-        vector3_type direction = data.xyz;
-        return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
+        const scalar_type scaleRcp = scalar_type(1.0) / hlsl::sqrt(hlsl::dot(data, data));
+        const vector3_type modV = v * scalar_type(2.0) * scaleRcp;
+        const vector3_type direction = data.xyz;
+        return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV));
     }
 
     matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC
@@ -336,8 +342,10 @@ struct normalize_helper<math::truncated_quaternion<T> >
 {
     static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
     {
+        assert(hlsl::length(q.data) == scalar_type(1.0));
+
         math::truncated_quaternion<T> retval;
-        retval.data = hlsl::normalize(q.data);
+        retval.data = q.data;   // should be normalized by definition (dropped component should be 1.0)
         return retval;
     }
 };

From 96ef95d82251abfcf85f194afa43b2e46982b87a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 8 Jan 2026 14:09:22 +0700
Subject: [PATCH 12/27] added matrix runtime traits for checking orthogonality,
 uniform scale

---
 .../math/linalg/matrix_runtime_traits.hlsl    | 66 +++++++++++++++++++
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 24 +------
 src/nbl/builtin/CMakeLists.txt                |  1 +
 3 files changed, 70 insertions(+), 21 deletions(-)
 create mode 100644 include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl

diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
new file mode 100644
index 0000000000..fc19b2cb3e
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/testing/relative_approx_compare.hlsl"
+#include "nbl/builtin/hlsl/concepts/matrix.hlsl"
+#include "nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+namespace linalg
+{
+
+template<typename T NBL_PRIMARY_REQUIRES(concepts::Matricial<T> && matrix_traits<T>::Square)
+struct RuntimeTraits
+{
+    using matrix_t = T;
+    using scalar_t = typename matrix_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t N = matrix_traits<T>::RowCount;
+
+    static RuntimeTraits<matrix_t> create(const matrix_t m)
+    {
+        RuntimeTraits<matrix_t> retval;
+        retval.invertible = !testing::relativeApproxCompare(hlsl::determinant(m), scalar_t(0.0), 1e-5);
+        {
+            bool orthogonal = true;
+            NBL_UNROLL for (uint16_t i = 0; i < N; i++)
+                orthogonal = testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4) && orthogonal;
+            retval.orthogonal = orthogonal;
+        }
+        {
+            const matrix_t m_T = hlsl::transpose(m);
+            scalar_t dots[N];
+            NBL_UNROLL for (uint16_t i = 0; i < N; i++)
+                dots[i] = hlsl::dot(m[i], m[i]);
+
+            bool uniformScale = true;
+            NBL_UNROLL for (uint16_t i = 0; i < N-1; i++)
+                uniformScale = testing::relativeApproxCompare(dots[i], dots[i+1], 1e-4) && uniformScale;
+
+            retval.uniformScale = uniformScale;
+            retval.orthonormal = uniformScale && retval.orthogonal && testing::relativeApproxCompare(dots[0], scalar_t(1.0), 1e-5);
+        }
+        return retval;
+    }
+    
+    bool invertible;
+    bool orthogonal;
+    bool uniformScale;
+    bool orthonormal;
+};
+
+}
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 34abb042fb..59f2eea243 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -6,6 +6,7 @@
 
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 #include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl"
 
 namespace nbl
 {
@@ -96,31 +97,12 @@ struct quaternion
         );
     }
 
-    static bool __isEqual(const scalar_type a, const scalar_type b)
-    {
-        return hlsl::max(a/b, b/a) <= scalar_type(1e-4);
-    }
-    static bool __dotIsZero(const vector3_type a, const vector3_type b)
-    {
-        const scalar_type ab = hlsl::dot(a, b);
-        return hlsl::abs(ab) <= scalar_type(1e-4);
-    }
-
     static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
     {
         {
             // only orthogonal and uniform scale mats can be converted
-            bool valid = __dotIsZero(m[0], m[1]);
-            valid = __dotIsZero(m[1], m[2]) && valid;
-            valid = __dotIsZero(m[0], m[2]) && valid;
-
-            const matrix_type m_T = hlsl::transpose(m);
-            const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]);
-            const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]);
-            const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]);
-            valid = __isEqual(dotCol0, dotCol1) && valid;
-            valid = __isEqual(dotCol1, dotCol2) && valid;
-            valid = __isEqual(dotCol0, dotCol2) && valid;
+            linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(m);
+            bool valid = traits.orthogonal && traits.uniformScale;
 
             if (dontAssertValidMatrix)
                 if (!valid)
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 085ed3c923..038ac2573d 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -223,6 +223,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl")
 #linear algebra
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_runtime_traits.hlsl")
 # TODO: rename `equations` to `polynomials` probably
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")

From a0acd3512dbf594ea31077d1f9a4b5786b89e08f Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 13 Jan 2026 14:00:40 +0700
Subject: [PATCH 13/27] matrix runtime traits stores uniform scale squared,
 changed calculations slightly

---
 .../math/linalg/matrix_runtime_traits.hlsl    | 22 ++++++++++++-------
 .../nbl/builtin/hlsl/math/quaternions.hlsl    |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
index fc19b2cb3e..43b05d56ba 100644
--- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
+++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
@@ -33,28 +33,34 @@ struct RuntimeTraits
         {
             bool orthogonal = true;
             NBL_UNROLL for (uint16_t i = 0; i < N; i++)
-                orthogonal = testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4) && orthogonal;
+                orthogonal = orthogonal && testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4);
             retval.orthogonal = orthogonal;
         }
         {
             const matrix_t m_T = hlsl::transpose(m);
             scalar_t dots[N];
             NBL_UNROLL for (uint16_t i = 0; i < N; i++)
-                dots[i] = hlsl::dot(m[i], m[i]);
+                dots[i] = hlsl::dot(m[i], m_T[i]);
 
-            bool uniformScale = true;
-            NBL_UNROLL for (uint16_t i = 0; i < N-1; i++)
-                uniformScale = testing::relativeApproxCompare(dots[i], dots[i+1], 1e-4) && uniformScale;
+            scalar_t uniformScaleSq = hlsl::dot(m[0], m_T[0]);
+            NBL_UNROLL for (uint16_t i = 1; i < N; i++)
+            {
+                if (!testing::relativeApproxCompare(hlsl::dot(m[i], m_T[i]), uniformScaleSq, 1e-4))
+                {
+                    uniformScaleSq = bit_cast<scalar_t>(numeric_limits<scalar_t>::quiet_NaN);
+                    break;
+                }
+            }
 
-            retval.uniformScale = uniformScale;
-            retval.orthonormal = uniformScale && retval.orthogonal && testing::relativeApproxCompare(dots[0], scalar_t(1.0), 1e-5);
+            retval.uniformScaleSq = uniformScaleSq;
+            retval.orthonormal = retval.orthogonal && testing::relativeApproxCompare(uniformScaleSq, scalar_t(1.0), 1e-5);
         }
         return retval;
     }
     
     bool invertible;
     bool orthogonal;
-    bool uniformScale;
+    scalar_t uniformScaleSq;
     bool orthonormal;
 };
 
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 59f2eea243..966463b5e4 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -102,7 +102,7 @@ struct quaternion
         {
             // only orthogonal and uniform scale mats can be converted
             linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(m);
-            bool valid = traits.orthogonal && traits.uniformScale;
+            bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq);
 
             if (dontAssertValidMatrix)
                 if (!valid)

From d1c4a89881934f6123ddfff225e452acc8c312dc Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 13 Jan 2026 14:51:54 +0700
Subject: [PATCH 14/27] added more static_casts and new partial spec for
 flipIfRHSNegative

---
 include/nbl/builtin/hlsl/ieee754.hlsl         | 36 ++++++++++++---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 45 ++++++++++++-------
 2 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl
index a3930a362a..af23d6f07d 100644
--- a/include/nbl/builtin/hlsl/ieee754.hlsl
+++ b/include/nbl/builtin/hlsl/ieee754.hlsl
@@ -204,12 +204,12 @@ struct flipSign_helper<Vectorial, BoolVector NBL_PARTIAL_REQ_BOT(concepts::Float
 	}
 };
 
-template <typename T NBL_STRUCT_CONSTRAINABLE>
+template <typename T, typename U NBL_STRUCT_CONSTRAINABLE>
 struct flipSignIfRHSNegative_helper;
 
 template <typename FloatingPoint>
 NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeScalar<FloatingPoint>)
-struct flipSignIfRHSNegative_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeScalar<FloatingPoint>) >
+struct flipSignIfRHSNegative_helper<FloatingPoint, FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeScalar<FloatingPoint>) >
 {
 	static FloatingPoint __call(FloatingPoint val, FloatingPoint flip)
 	{
@@ -222,7 +222,7 @@ struct flipSignIfRHSNegative_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::
 
 template <typename Vectorial>
 NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<Vectorial>)
-struct flipSignIfRHSNegative_helper<Vectorial NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<Vectorial>) >
+struct flipSignIfRHSNegative_helper<Vectorial, Vectorial NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<Vectorial>) >
 {
 	static Vectorial __call(Vectorial val, Vectorial flip)
 	{
@@ -232,7 +232,29 @@ struct flipSignIfRHSNegative_helper<Vectorial NBL_PARTIAL_REQ_BOT(concepts::Floa
 
 		Vectorial output;
 		for (uint32_t i = 0; i < traits_v::Dimension; ++i)
-			setter(output, i, flipSignIfRHSNegative_helper<typename traits_v::scalar_type>::__call(getter_v(val, i), getter_v(flip, i)));
+			setter(output, i, flipSignIfRHSNegative_helper<typename traits_v::scalar_type,typename traits_v::scalar_type>::__call(getter_v(val, i), getter_v(flip, i)));
+
+		return output;
+	}
+};
+
+template <typename Vectorial, typename FloatingPoint>
+NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<Vectorial> && concepts::FloatingPointLikeScalar<FloatingPoint>)
+struct flipSignIfRHSNegative_helper<Vectorial, FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<Vectorial> && concepts::FloatingPointLikeScalar<FloatingPoint>) >
+{
+	static Vectorial __call(Vectorial val, FloatingPoint flip)
+	{
+		using traits_v = hlsl::vector_traits<Vectorial>;
+		array_get<Vectorial, typename traits_v::scalar_type> getter_v;
+		array_set<Vectorial, typename traits_v::scalar_type> setter;
+
+		using AsFloat = typename float_of_size<sizeof(FloatingPoint)>::type;
+		using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
+		const AsUint signBitFlip = ieee754::traits<AsFloat>::signMask & ieee754::impl::bitCastToUintType(flip);
+
+		Vectorial output;
+		for (uint32_t i = 0; i < traits_v::Dimension; ++i)
+			setter(output, i, bit_cast<FloatingPoint>(ieee754::impl::bitCastToUintType(getter_v(val, i)) ^ signBitFlip));
 
 		return output;
 	}
@@ -245,10 +267,10 @@ NBL_CONSTEXPR_FUNC T flipSign(T val, U flip)
 	return impl::flipSign_helper<T, U>::__call(val, flip);
 }
 
-template <typename T>
-NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip)
+template <typename T, typename U=T>
+NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, U flip)
 {
-	return impl::flipSignIfRHSNegative_helper<T>::__call(val, flip);
+	return impl::flipSignIfRHSNegative_helper<T, U>::__call(val, flip);
 }
 
 template <typename T NBL_FUNC_REQUIRES(hlsl::is_floating_point_v<T>)
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 966463b5e4..2133490f2d 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -52,16 +52,19 @@ struct quaternion
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    template<typename U=vector3_type, typename F=scalar_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U> && is_same_v<scalar_type,F>)
-    static this_t create(const U axis, const F angle, const F uniformScale = scalar_type(1.0))
+    template<typename U=vector3_type, typename F=scalar_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U> && is_same_v<typename vector_traits<U>::scalar_type,F>)
+    static this_t create(const U axis, const F angle, const F uniformScale = F(1.0))
     {
+        using scalar_t = typename vector_traits<U>::scalar_type;
         this_t q;
-        const scalar_type sinTheta = hlsl::sin(angle * 0.5);
-        const scalar_type cosTheta = hlsl::cos(angle * 0.5);
+        const scalar_t halfAngle = angle * scalar_t(0.5);
+        const scalar_t sinTheta = hlsl::sin(halfAngle);
+        const scalar_t cosTheta = hlsl::cos(halfAngle);
         q.data = data_type(axis * sinTheta, cosTheta) * uniformScale;
         return q;
     }
 
+    // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll
     template<typename U=vector<scalar_type,2> NBL_FUNC_REQUIRES(is_same_v<vector<scalar_type,2>,U>)
     static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin)
     {
@@ -99,10 +102,12 @@ struct quaternion
 
     static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
     {
+        scalar_type uniformScaleSq;
         {
             // only orthogonal and uniform scale mats can be converted
             linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(m);
             bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq);
+            uniformScaleSq = traits.uniformScaleSq;
 
             if (dontAssertValidMatrix)
                 if (!valid)
@@ -168,7 +173,7 @@ struct quaternion
             }
         }
 
-        retval.data = hlsl::normalize(retval.data) / hlsl::sqrt(hlsl::dot(m[0], m[0])); // restore uniform scale
+        retval.data = hlsl::normalize(retval.data) * hlsl::rsqrt(uniformScaleSq); // restore uniform scale
         return retval;
     }
 
@@ -193,10 +198,9 @@ struct quaternion
 
     static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
-        assert(hlsl::length(start.data) == scalar_type(1.0));
-        assert(hlsl::length(end.data) == scalar_type(1.0));
-        // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
-        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
+        assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4)));
+        assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4)));
+        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type,scalar_type>(end.data, totalPseudoAngle);
 
         this_t retval;
         retval.data = hlsl::mix(start.data, adjEnd, hlsl::promote<data_type>(fraction));
@@ -225,8 +229,8 @@ struct quaternion
 
     static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction)
     {
-        assert(hlsl::length(start.data) == scalar_type(1.0));
-        assert(hlsl::length(end.data) == scalar_type(1.0));
+        assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4)));
+        assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4)));
 
         const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
         const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
@@ -252,7 +256,7 @@ struct quaternion
         return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV));
     }
 
-    matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC
+    matrix_type __constructMatrix() NBL_CONST_MEMBER_FUNC
     {
         matrix_type mat;
         mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0);
@@ -294,7 +298,7 @@ struct quaternion
             const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));
             const scalar_type sinAt_over_sinA = sinAt*sinARcp;
             const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A)
-            const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
+            const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type,scalar_type>(end.data, totalPseudoAngle);
             retval.data = scale * start.data + sinAt_over_sinA * adjEnd;
 
             return retval;
@@ -324,8 +328,7 @@ struct normalize_helper<math::truncated_quaternion<T> >
 {
     static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
     {
-        assert(hlsl::length(q.data) == scalar_type(1.0));
-
+        assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4)));
         math::truncated_quaternion<T> retval;
         retval.data = q.data;   // should be normalized by definition (dropped component should be 1.0)
         return retval;
@@ -363,6 +366,7 @@ struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 {
     static inline math::truncated_quaternion<T> cast(const math::quaternion<T> q)
     {
+        assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4)));
         math::truncated_quaternion<T> t;
         t.data.x = t.data.x;
         t.data.y = t.data.y;
@@ -376,7 +380,16 @@ struct static_cast_helper<matrix<T,3,3>, math::quaternion<T> >
 {
     static inline matrix<T,3,3> cast(const math::quaternion<T> q)
     {
-        return q.constructMatrix();
+        return q.__constructMatrix();
+    }
+};
+
+template<typename T>
+struct static_cast_helper<math::quaternion<T>, matrix<T,3,3> >
+{
+    static inline math::quaternion<T> cast(const matrix<T,3,3> m)
+    {
+        return math::quaternion<T>::create(m, true);
     }
 };
 }

From e8a6488d5dc36e6cd578416669e1c7c37fd08393 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 13 Jan 2026 15:08:17 +0700
Subject: [PATCH 15/27] account for no scale in transform vector

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 2133490f2d..7e10a5c710 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -250,10 +250,17 @@ struct quaternion
 
     vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC
     {
-        const scalar_type scaleRcp = scalar_type(1.0) / hlsl::sqrt(hlsl::dot(data, data));
-        const vector3_type modV = v * scalar_type(2.0) * scaleRcp;
+        const scalar_type scaleRcp = hlsl::rsqrt(hlsl::dot(data, data));
+        vector3_type retV = v;
+        scalar_type modVScale = scalar_type(2.0);
+        if (!assumeNoScale)
+        {
+            retV /= scaleRcp;
+            modVScale *= scaleRcp;
+        }
+        const vector3_type modV = v * modVScale;
         const vector3_type direction = data.xyz;
-        return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV));
+        return retV + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV));
     }
 
     matrix_type __constructMatrix() NBL_CONST_MEMBER_FUNC
@@ -293,6 +300,9 @@ struct quaternion
         const scalar_type cosA = ieee754::flipSignIfRHSNegative<scalar_type>(totalPseudoAngle, totalPseudoAngle);
         if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
         {
+            assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4)));
+            assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4)));
+
             this_t retval;
             const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
             const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));

From c07365fbe5097dfe40cb5480fdf7209c7aa045bf Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 14 Jan 2026 10:54:19 +0700
Subject: [PATCH 16/27] more comment info for pitch yaw roll, removed normalize
 truncated quat

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 7e10a5c710..9efe359298 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -52,8 +52,8 @@ struct quaternion
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    template<typename U=vector3_type, typename F=scalar_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U> && is_same_v<typename vector_traits<U>::scalar_type,F>)
-    static this_t create(const U axis, const F angle, const F uniformScale = F(1.0))
+    template<typename U=vector3_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U>)
+    static this_t create(const U axis, const typename vector_traits<U>::scalar_type angle, const typename vector_traits<U>::scalar_type uniformScale = typename vector_traits<U>::scalar_type(1.0))
     {
         using scalar_t = typename vector_traits<U>::scalar_type;
         this_t q;
@@ -64,7 +64,7 @@ struct quaternion
         return q;
     }
 
-    // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll
+    // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll (X * Y * Z) -- mul(roll,mul(yaw,mul(pitch,v)))
     template<typename U=vector<scalar_type,2> NBL_FUNC_REQUIRES(is_same_v<vector<scalar_type,2>,U>)
     static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin)
     {
@@ -333,18 +333,6 @@ struct quaternion
 
 namespace cpp_compat_intrinsics_impl
 {
-template<typename T>
-struct normalize_helper<math::truncated_quaternion<T> >
-{
-    static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
-    {
-        assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4)));
-        math::truncated_quaternion<T> retval;
-        retval.data = q.data;   // should be normalized by definition (dropped component should be 1.0)
-        return retval;
-    }
-};
-
 template<typename T>
 struct normalize_helper<math::quaternion<T> >
 {

From 2aa275e6a811aa01961bac2eb5c72374ed652d7c Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 14 Jan 2026 12:35:55 +0700
Subject: [PATCH 17/27] create from matrix restore scale correctly

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 9efe359298..b001052810 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -173,7 +173,7 @@ struct quaternion
             }
         }
 
-        retval.data = hlsl::normalize(retval.data) * hlsl::rsqrt(uniformScaleSq); // restore uniform scale
+        retval.data = hlsl::normalize(retval.data) * hlsl::sqrt(uniformScaleSq); // restore uniform scale
         return retval;
     }
 

From 3869cb105e554dfb7b036094941f96d3f583879d Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 15 Jan 2026 11:31:51 +0700
Subject: [PATCH 18/27] minor bug fixes to quaternions

---
 .../builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl  | 8 ++------
 include/nbl/builtin/hlsl/math/quaternions.hlsl           | 9 ++++-----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
index 43b05d56ba..dc74c45ddd 100644
--- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
+++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
@@ -38,14 +38,10 @@ struct RuntimeTraits
         }
         {
             const matrix_t m_T = hlsl::transpose(m);
-            scalar_t dots[N];
-            NBL_UNROLL for (uint16_t i = 0; i < N; i++)
-                dots[i] = hlsl::dot(m[i], m_T[i]);
-
-            scalar_t uniformScaleSq = hlsl::dot(m[0], m_T[0]);
+            scalar_t uniformScaleSq = hlsl::dot(m_T[0], m_T[0]);
             NBL_UNROLL for (uint16_t i = 1; i < N; i++)
             {
-                if (!testing::relativeApproxCompare(hlsl::dot(m[i], m_T[i]), uniformScaleSq, 1e-4))
+                if (!testing::relativeApproxCompare(hlsl::dot(m_T[i], m_T[i]), uniformScaleSq, 1e-4))
                 {
                     uniformScaleSq = bit_cast<scalar_t>(numeric_limits<scalar_t>::quiet_NaN);
                     break;
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b001052810..31fb97a51a 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -128,7 +128,6 @@ struct quaternion
         const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
 
-        // const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
         const data_type tmp = Qx + Qy + Qz;
 
         // TODO: speed this up
@@ -364,11 +363,11 @@ struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 {
     static inline math::truncated_quaternion<T> cast(const math::quaternion<T> q)
     {
-        assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4)));
+        assert(testing::relativeApproxCompare(hlsl::length(q.data), T(1.0), T(1e-4)));
         math::truncated_quaternion<T> t;
-        t.data.x = t.data.x;
-        t.data.y = t.data.y;
-        t.data.z = t.data.z;
+        t.data.x = q.data.x;
+        t.data.y = q.data.y;
+        t.data.z = q.data.z;
         return t;
     }
 };

From a602fba54c90d1473c5d2267a54e1a25d03e5ae6 Mon Sep 17 00:00:00 2001
From: devsh <devsh@devsh.eu>
Date: Thu, 15 Jan 2026 08:06:02 +0100
Subject: [PATCH 19/27] add some operators to hlsl::matrix in C++

---
 include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl
index 712ce5e979..c0b5023990 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl
@@ -28,8 +28,15 @@ struct matrix final : private glm::mat<N,M,T>
         return *this;
     }
 
-    friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast<Base const&>(lhs) + reinterpret_cast<Base const&>(rhs)); }
-    friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast<Base const&>(lhs) - reinterpret_cast<Base const&>(rhs)); }
+    // not sure how to forward this
+    //inline friend matrix operator*(matrix const& lhs, T rhs) {return matrix(reinterpret_cast<Base const&>(lhs)*rhs);}
+
+    // scalar compound assignment multiply and divide
+    inline matrix& operator*=(const T rhs) {return reinterpret_cast<matrix&>(Base::template operator*=(rhs));}
+    inline matrix& operator/=(const T rhs) {return reinterpret_cast<matrix&>(Base::template operator/=(rhs));}
+
+    inline friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast<Base const&>(lhs) + reinterpret_cast<Base const&>(rhs)); }
+    inline friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast<Base const&>(lhs) - reinterpret_cast<Base const&>(rhs)); }
 
     template<uint16_t K>
     inline friend matrix<T, N, K> mul(matrix const& lhs, matrix<T, M, K> const& rhs)

From 4aa236838791995f0dc309ca3b1f27e3c44cf968 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 15 Jan 2026 14:35:09 +0700
Subject: [PATCH 20/27] factor out uniform scale from mat before convert

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 31fb97a51a..00200d903a 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -100,12 +100,12 @@ struct quaternion
         );
     }
 
-    static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
+    static this_t create(NBL_CONST_REF_ARG(matrix_type) _m, const bool dontAssertValidMatrix=false)
     {
         scalar_type uniformScaleSq;
         {
             // only orthogonal and uniform scale mats can be converted
-            linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(m);
+            linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(_m);
             bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq);
             uniformScaleSq = traits.uniformScaleSq;
 
@@ -120,6 +120,10 @@ struct quaternion
                 assert(valid);
         }
 
+        const scalar_type uniformScale = hlsl::sqrt(uniformScaleSq);
+        matrix_type m = _m;
+        m /= uniformScale;
+
         const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
         const scalar_type neg_m00 = -m00;
         const scalar_type neg_m11 = -m11;
@@ -172,7 +176,7 @@ struct quaternion
             }
         }
 
-        retval.data = hlsl::normalize(retval.data) * hlsl::sqrt(uniformScaleSq); // restore uniform scale
+        retval.data = hlsl::normalize(retval.data) * uniformScale; // restore uniform scale
         return retval;
     }
 

From 5f02325d67e768e7f724b19fe9e0ec0a2ceaf312 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 15 Jan 2026 17:25:56 +0700
Subject: [PATCH 21/27] new vector comparison by orientation

---
 .../hlsl/testing/orientation_compare.hlsl     | 44 +++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |  1 +
 2 files changed, 45 insertions(+)
 create mode 100644 include/nbl/builtin/hlsl/testing/orientation_compare.hlsl

diff --git a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl
new file mode 100644
index 0000000000..7884cf1b21
--- /dev/null
+++ b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl
@@ -0,0 +1,44 @@
+#ifndef _NBL_BUILTIN_HLSL_TESTING_ORIENTATION_COMPARE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_TESTING_ORIENTATION_COMPARE_INCLUDED_
+
+#include <nbl/builtin/hlsl/testing/relative_approx_compare.hlsl>
+
+namespace nbl 
+{
+namespace hlsl
+{
+namespace testing
+{
+namespace impl
+{
+
+template<typename FloatingPointVector NBL_PRIMARY_REQUIRES(concepts::FloatingPointLikeVectorial<FloatingPointVector>)
+struct OrientationCompareHelper
+{
+    static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAllowedDifference)
+    {
+        using traits = nbl::hlsl::vector_traits<FloatingPointVector>;
+        using scalar_t = typename traits::scalar_type;
+
+        const scalar_t dotLR = hlsl::dot(lhs, rhs);
+        if (dotLR < scalar_t(0.0))
+            return false;
+
+        const scalar_t scale = hlsl::sqrt(hlsl::dot(lhs,lhs) * hlsl::dot(rhs,rhs));
+        return relativeApproxCompare<scalar_t>(dotLR, scale, maxAllowedDifference);
+    }
+};
+
+}
+
+template<typename T>
+bool orientationCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAllowedDifference)
+{
+	return impl::OrientationCompareHelper<T>::__call(lhs, rhs, maxAllowedDifference);
+}
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index e44f41b29e..86a0ddf9b9 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -377,5 +377,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl")
 #testing
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/orientation_compare.hlsl")
 
 ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")

From 11f7f2eb72fa4286b9a6cab5fa696b83429720a1 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 15 Jan 2026 18:14:44 +0700
Subject: [PATCH 22/27] return nan quaternion if uniform scale is 0

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 00200d903a..ed0f796fe8 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -119,6 +119,12 @@ struct quaternion
             else
                 assert(valid);
         }
+        if (uniformScaleSq < numeric_limits<scalar_type>::min)
+        {
+            this_t retval;
+            retval.data = hlsl::promote<data_type>(bit_cast<scalar_type>(numeric_limits<scalar_type>::quiet_NaN));
+            return retval;
+        }
 
         const scalar_type uniformScale = hlsl::sqrt(uniformScaleSq);
         matrix_type m = _m;

From 266cd710e7fae965def0f6c79a3488a28db8ba53 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 16 Jan 2026 10:25:08 +0700
Subject: [PATCH 23/27] account for negative orientation, added check for 0
 length vectors

---
 include/nbl/builtin/hlsl/testing/orientation_compare.hlsl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl
index 7884cf1b21..3228872b4d 100644
--- a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl
+++ b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl
@@ -20,11 +20,13 @@ struct OrientationCompareHelper
         using traits = nbl::hlsl::vector_traits<FloatingPointVector>;
         using scalar_t = typename traits::scalar_type;
 
-        const scalar_t dotLR = hlsl::dot(lhs, rhs);
-        if (dotLR < scalar_t(0.0))
+        const scalar_t dotLR = hlsl::abs(hlsl::dot(lhs, rhs));
+        const scalar_t dotLL = hlsl::dot(lhs,lhs);
+        const scalar_t dotRR = hlsl::dot(rhs,rhs);
+        if (dotLL < numeric_limits<scalar_t>::min || dotRR < numeric_limits<scalar_t>::min)
             return false;
 
-        const scalar_t scale = hlsl::sqrt(hlsl::dot(lhs,lhs) * hlsl::dot(rhs,rhs));
+        const scalar_t scale = hlsl::sqrt(dotLL * dotRR);
         return relativeApproxCompare<scalar_t>(dotLR, scale, maxAllowedDifference);
     }
 };

From ce371c732aba74e6038075b3ba5f96dc12303daf Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 16 Jan 2026 15:03:17 +0700
Subject: [PATCH 24/27] fixes create from matrix by using correct row-column
 indexing

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index ed0f796fe8..b56c6024cd 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -146,9 +146,9 @@ struct quaternion
         {
             const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0));
             const scalar_type invscales = scalar_type(0.5) / scales;
-            retval.data.x = (m[2][1] - m[1][2]) * invscales;
-            retval.data.y = (m[0][2] - m[2][0]) * invscales;
-            retval.data.z = (m[1][0] - m[0][1]) * invscales;
+            retval.data.x = (m[1][2] - m[2][1]) * invscales;
+            retval.data.y = (m[2][0] - m[0][2]) * invscales;
+            retval.data.z = (m[0][1] - m[1][0]) * invscales;
             retval.data.w = scales * scalar_type(0.5);
         }
         else
@@ -158,31 +158,31 @@ struct quaternion
                 const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0));
                 const scalar_type invscales = scalar_type(0.5) / scales;
                 retval.data.x = scales * scalar_type(0.5);
-                retval.data.y = (m[0][1] + m[1][0]) * invscales;
-                retval.data.z = (m[2][0] + m[0][2]) * invscales;
-                retval.data.w = (m[2][1] - m[1][2]) * invscales;
+                retval.data.y = (m[1][0] + m[0][1]) * invscales;
+                retval.data.z = (m[0][2] + m[2][0]) * invscales;
+                retval.data.w = (m[1][2] - m[2][1]) * invscales;
             }
             else if (tmp.z > scalar_type(0.0))
             {
                 const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0));
                 const scalar_type invscales = scalar_type(0.5) / scales;
-                retval.data.x = (m[0][1] + m[1][0]) * invscales;
+                retval.data.x = (m[1][0] + m[0][1]) * invscales;
                 retval.data.y = scales * scalar_type(0.5);
-                retval.data.z = (m[1][2] + m[2][1]) * invscales;
-                retval.data.w = (m[0][2] - m[2][0]) * invscales;
+                retval.data.z = (m[2][1] + m[1][2]) * invscales;
+                retval.data.w = (m[2][0] - m[0][2]) * invscales;
             }
             else
             {
                 const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0));
                 const scalar_type invscales = scalar_type(0.5) / scales;
-                retval.data.x = (m[0][2] + m[2][0]) * invscales;
-                retval.data.y = (m[1][2] + m[2][1]) * invscales;
+                retval.data.x = (m[2][0] + m[0][2]) * invscales;
+                retval.data.y = (m[2][1] + m[1][2]) * invscales;
                 retval.data.z = scales * scalar_type(0.5);
-                retval.data.w = (m[1][0] - m[0][1]) * invscales;
+                retval.data.w = (m[0][1] - m[1][0]) * invscales;
             }
         }
 
-        retval.data = hlsl::normalize(retval.data) * uniformScale; // restore uniform scale
+        retval.data = retval.data * uniformScale; // restore uniform scale
         return retval;
     }
 
@@ -326,7 +326,7 @@ struct quaternion
             return unnormLerp(start, end, fraction, totalPseudoAngle);
     }
 
-    this_t inverse() NBL_CONST_MEMBER_FUNC
+    this_t operator-() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
         retval.data.xyz = -data.xyz;

From c8df31adf40358139f49e6d9caa52234993a8727 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 16 Jan 2026 15:09:33 +0700
Subject: [PATCH 25/27] fix glm not liking unary - on swizzle

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b56c6024cd..9208bc2256 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -329,7 +329,9 @@ struct quaternion
     this_t operator-() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
-        retval.data.xyz = -data.xyz;
+        retval.data.x = -data.x;
+        retval.data.y = -data.y;
+        retval.data.z = -data.z;
         retval.data.w = data.w;
         return retval;
     }

From f4b0b43d8aa65d83bc2559da24c694920cfa141a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 16 Jan 2026 15:29:13 +0700
Subject: [PATCH 26/27] do quaternion inverse not as member function

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 9208bc2256..2d294cd4be 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -326,16 +326,6 @@ struct quaternion
             return unnormLerp(start, end, fraction, totalPseudoAngle);
     }
 
-    this_t operator-() NBL_CONST_MEMBER_FUNC
-    {
-        this_t retval;
-        retval.data.x = -data.x;
-        retval.data.y = -data.y;
-        retval.data.z = -data.z;
-        retval.data.w = data.w;
-        return retval;
-    }
-
     data_type data;
 };
 
@@ -403,6 +393,18 @@ struct static_cast_helper<math::quaternion<T>, matrix<T,3,3> >
 };
 }
 
+template<typename T>
+math::quaternion<T> inverse(const math::quaternion<T> q)
+{
+    math::quaternion<T> retval;
+    retval.data.x = -q.data.x;
+    retval.data.y = -q.data.y;
+    retval.data.z = -q.data.z;
+    retval.data.w = q.data.w;
+    retval.data /= hlsl::dot(q.data,q.data);
+    return retval;
+}
+
 }
 }
 

From 5c53ae58d707341401f161fc1969400d53bf9ab1 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 16 Jan 2026 20:30:15 +0700
Subject: [PATCH 27/27] added a vector length compare

---
 .../hlsl/testing/vector_length_compare.hlsl   | 45 +++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |  1 +
 2 files changed, 46 insertions(+)
 create mode 100644 include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl

diff --git a/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl b/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl
new file mode 100644
index 0000000000..03bf72b006
--- /dev/null
+++ b/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl
@@ -0,0 +1,45 @@
+#ifndef _NBL_BUILTIN_HLSL_TESTING_VECTOR_LENGTH_COMPARE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_TESTING_VECTOR_LENGTH_COMPARE_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/concepts.hlsl>
+#include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
+
+namespace nbl 
+{
+namespace hlsl
+{
+namespace testing
+{
+namespace impl
+{
+
+template<typename FloatingPointVector NBL_PRIMARY_REQUIRES(concepts::FloatingPointLikeVectorial<FloatingPointVector>)
+struct LengthCompareHelper
+{
+    static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference)
+    {
+        using traits = nbl::hlsl::vector_traits<FloatingPointVector>;
+        using scalar_t = typename traits::scalar_type;
+
+        const scalar_t dotLL = hlsl::dot(lhs,lhs);
+        const scalar_t dotRR = hlsl::dot(rhs,rhs);
+        const scalar_t diff = hlsl::abs(dotLL-dotRR);
+        const scalar_t sc = hlsl::max(dotLL,dotRR);
+        return diff <= maxAbsoluteDifference || diff <= maxRelativeDifference*sc;
+    }
+};
+
+}
+
+template<typename T>
+bool vectorLengthCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference)
+{
+	return impl::LengthCompareHelper<T>::__call(lhs, rhs, maxAbsoluteDifference, maxRelativeDifference);
+}
+
+}
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 86a0ddf9b9..7a2a2e27c2 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -378,5 +378,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl")
 #testing
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/orientation_compare.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/vector_length_compare.hlsl")
 
 ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")