From 8f3fb362d9b9aeeb75fae684c9cfed1dadbabecf Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 17 Jan 2025 08:47:49 -0500
Subject: [PATCH 1/3] Add CUDA HMAC test

---
 test/nvcc_jamfile |   2 +
 test/test_hmac.cu | 123 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 test/test_hmac.cu
diff --git a/test/nvcc_jamfile b/test/nvcc_jamfile
index 88450bf6..7bdbdccc 100644
--- a/test/nvcc_jamfile
+++ b/test/nvcc_jamfile
@@ -22,3 +22,5 @@ run test_sha3_256_nvcc.cu ;
 run test_sha3_224_nvcc.cu ;
 run test_shake128_nvcc.cu ;
 run test_shake256_nvcc.cu ;
+
+run test_hmac.cu ;
diff --git a/test/test_hmac.cu b/test/test_hmac.cu
new file mode 100644
index 00000000..b0e8e9bc
--- /dev/null
+++ b/test/test_hmac.cu
@@ -0,0 +1,123 @@
+//  Copyright Matt Borland 2024
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <cuda_runtime.h>
+#include <boost/crypt2/mac/hmac.hpp>
+#include <boost/crypt2/hash/sha3_512.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+#include "generate_random_strings.hpp"
+#include <iostream>
+#include <iomanip>
+#include <exception>
+#include <memory>
+#include <span>
+
+using digest_type = typename boost::crypt::sha3_512_hasher::return_type;
+
+// The kernel function
+__global__ void cuda_test(char** in, digest_type* out, int numElements)
+{
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        boost::crypt::hmac<boost::crypt::sha3_512_hasher> hmac_class;
+        auto in_span {cuda::std::span(in[i], 64)};
+        hmac_class.init(in_span);
+        hmac_class.process_bytes(in_span);
+        hmac_class.finalize();
+        out[i] = hmac_class.get_digest().value();
+    }
+}
+
+int main()
+{
+    try
+    {
+        // Error code to check return values for CUDA calls
+        cudaError_t err = cudaSuccess;
+
+        // Print the vector length to be used, and compute its size
+        constexpr int numElements = 50000;
+        constexpr std::size_t elementSize = 64;
+
+        std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+        // Allocate the managed input vector A
+        char** input_vector1;
+        cudaMallocManaged(&input_vector1, numElements * sizeof(char*));
+
+        // Allocate the managed output vector C
+        cuda_managed_ptr<digest_type> output_vector(numElements);
+
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char));
+            if (input_vector1[i] == nullptr)
+            {
+                throw std::runtime_error("Failed to allocate memory for input_vector1");
+            }
+            boost::crypt::generate_random_string(input_vector1[i], elementSize);
+        }
+
+        // Launch the Vector Add CUDA Kernel
+        int threadsPerBlock = 256;
+        int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+        watch w;
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1, output_vector.get(), numElements);
+        cudaDeviceSynchronize();
+        std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<digest_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            std::span<char> in(input_vector1[i], elementSize);
+            boost::crypt::hmac<boost::crypt::sha3_512_hasher> hmac_class;
+            auto in_span {cuda::std::span(in, 64)};
+            hmac_class.init(in_span);
+            hmac_class.process_bytes(in_span);
+            hmac_class.finalize();
+            results.emplace_back(hmac_class.get_digest().value());
+        }
+        double t = w.elapsed();
+
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i][0] != results[i][0])
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+        std::cout << "Done\n";
+
+        // Cleanup all the memory we allocated
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaFree(input_vector1[i]);
+        }
+        cudaFree(input_vector1);
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << "Terminated with exception: " << e.what() << std::endl;
+    }
+}

From 8cf692de2b949a0a210910e7e7f1d0bb763df1d5 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 17 Jan 2025 09:42:19 -0500
Subject: [PATCH 2/3] Fix span construction error

---
 test/test_hmac.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_hmac.cu b/test/test_hmac.cu
index b0e8e9bc..a430cf36 100644
--- a/test/test_hmac.cu
+++ b/test/test_hmac.cu
@@ -25,7 +25,7 @@ __global__ void cuda_test(char** in, digest_type* out, int numElements)
     if (i < numElements)
     {
         boost::crypt::hmac<boost::crypt::sha3_512_hasher> hmac_class;
-        auto in_span {cuda::std::span(in[i], 64)};
+        cuda::std::span<char> in_span {in[i], static_cast<cuda::std::size_t>(64)};
         hmac_class.init(in_span);
         hmac_class.process_bytes(in_span);
         hmac_class.finalize();

From 0a797a30e903e849c52db5158aafc8607dc291be Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 17 Jan 2025 09:51:44 -0500
Subject: [PATCH 3/3] Fix std::span construction

---
 test/test_hmac.cu | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_hmac.cu b/test/test_hmac.cu
index a430cf36..69de6abb 100644
--- a/test/test_hmac.cu
+++ b/test/test_hmac.cu
@@ -86,9 +86,8 @@ int main()
         w.reset();
         for(int i = 0; i < numElements; ++i)
         {
-            std::span<char> in(input_vector1[i], elementSize);
+            std::span<char> in_span(input_vector1[i], static_cast<std::size_t>(64));
             boost::crypt::hmac<boost::crypt::sha3_512_hasher> hmac_class;
-            auto in_span {cuda::std::span(in, 64)};
             hmac_class.init(in_span);
             hmac_class.process_bytes(in_span);
             hmac_class.finalize();