From 3fcfb7c9c677760382233a2d014051afbf22687d Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Thu, 27 Jul 2023 15:34:25 +0100
Subject: [PATCH] Tests: Increase robustness of context creation testing

To ensure that no cuda calls occur prior to device selection, an always run method in the test suite previously used timing and a cutoff threshold to make sure a new context was being established.
This was quite flakey, as new driver updates could improve context creation times, requiring a lower threshold.

Instead, getting the current context via the driver api is used, and it should return null (or a cuda error) if no context yet exists.
This should be a much more robust testing mechanism.

This is still created per run of the test suite, regardless of if the specific test is requested or not, which is wasted time if running with a filter (one more context creation than needed, so extra time).

We may wish to move this test to a separate ctest orcestrated test binary, to ensure it is the first to run (Google test has no way to enforce test order as a feature)
---
 tests/helpers/device_initialisation.cu        | 45 +++++++++++--------
 tests/helpers/device_initialisation.h         | 14 ++++--
 tests/helpers/main.cu                         |  4 +-
 .../test_cuda_simulation_concurrency.cu       |  1 -
 4 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/tests/helpers/device_initialisation.cu b/tests/helpers/device_initialisation.cu
index 00468b2e8..2346fd91a 100644
--- a/tests/helpers/device_initialisation.cu
+++ b/tests/helpers/device_initialisation.cu
@@ -1,38 +1,45 @@
 #include "helpers/device_initialisation.h"
+#include <cuda.h>
 #include <stdio.h>
-#include <chrono>
 #include "flamegpu/flamegpu.h"
 
 namespace flamegpu {
 namespace tests {
 namespace {
     // Boolean to store the result of the test, in an anonymous namespace (i.e. static)
-    bool _CUDASimulationContextCreationTime_result = false;
+    bool _CUDASimulationContextCreation_result = false;
 }  // namespace
-// Set a threshold value, which is large enough to account for context creation
-// Experimentally cudaFree(0); takes ~2us (nsys) without context creation,
-// while cudaFree(0) including context creation takes ~> 150ms in my linux titan v system.
-// This test is a little fluffy.
-const double CONTEXT_CREATION_ATLEAST_SECONDS = 0.050;  // atleast 50ms?
 
-/* Test that CUDASimulation::applyConfig_derived() is invoked prior to any cuda call which will invoke the CUDA
-Alternative is to use the driver API, call CuCtxGetCurrent(CuContext* pctx) immediatebly before applyConfig, and if pctx is the nullptr then the context had not yet been initialised?
-@note - This needs to be called first, and only once.
-*/
-void timeCUDASimulationContextCreationTest() {
+void runCUDASimulationContextCreationTest() {
     // Create a very simple model to enable creation of a CUDASimulation
     ModelDescription m("model");
     m.newAgent("agent");
     CUDASimulation c(m);
     c.CUDAConfig().device_id = 0;
     c.SimulationConfig().steps = 1;
-    // Time how long applyconfig takes, which should invoke cudaFree as the first cuda command, initialising the context.
-    auto t0 = std::chrono::high_resolution_clock::now();
+    // Use the CUDA driver api to check there is no current context (i.e. it is null), ignoring any cuda errors reported
+    CUresult cuErr = CUDA_SUCCESS;
+    CUcontext ctxBefore = NULL;
+    CUcontext ctxAfter = NULL;
+    cuErr = cuCtxGetCurrent(&ctxBefore);
+    if (cuErr != CUDA_SUCCESS && cuErr != CUDA_ERROR_NOT_INITIALIZED) {
+        const char *error_str;
+        cuGetErrorString(cuErr, &error_str);
+        fprintf(stderr, "CUDA Driver API error occurred during cuCtxGetCurrent at %s(%d): %s.\n", __FILE__, __LINE__, error_str);
+        return;
+    }
+    // Apply the config, which should establish a cuda context
     c.applyConfig();
-    auto t1 = std::chrono::high_resolution_clock::now();
-    auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t0);
-    // The test fails if applyconfig was too fast.
-    _CUDASimulationContextCreationTime_result = time_span.count() >= CONTEXT_CREATION_ATLEAST_SECONDS;
+    // Use the CUDA driver API to ensure there is now a non-null CUDA context established.
+    cuErr = cuCtxGetCurrent(&ctxAfter);
+    if (cuErr != CUDA_SUCCESS) {
+        const char *error_str;
+        cuGetErrorString(cuErr, &error_str);
+        fprintf(stderr, "CUDA Driver API error occurred during cuCtxGetCurrent at %s(%d): %s.\n", __FILE__, __LINE__, error_str);
+        return;
+    }
+    // the result is truthy if the before context was null, and the after context is non null
+    _CUDASimulationContextCreation_result = ctxBefore == NULL && ctxAfter != NULL;
     // Run the simulation.
     c.simulate();
 }
@@ -42,7 +49,7 @@ void timeCUDASimulationContextCreationTest() {
  * @note - there is no way to know if the test has not yet been ran, instead it reports false.
  */
 bool getCUDASimulationContextCreationTestResult() {
-    return _CUDASimulationContextCreationTime_result;
+    return _CUDASimulationContextCreation_result;
 }
 
 }  // namespace tests
diff --git a/tests/helpers/device_initialisation.h b/tests/helpers/device_initialisation.h
index 138419b0d..b38ec8b9e 100644
--- a/tests/helpers/device_initialisation.h
+++ b/tests/helpers/device_initialisation.h
@@ -4,10 +4,16 @@
 namespace flamegpu {
 namespace tests {
 /**
- * Function to time the creation of the cuda context within the scope of FLAME GPU initialisation.
- * Must be run first to ensure a fresh context is being created, rather than within the google test suite which may be executed in a random order.
- */
-void timeCUDASimulationContextCreationTest();
+ *  Test that no cuda context is established prior to  CUDASimulation::applyConfig_derived().
+ *
+ * I.e. make sure that nothing occurs before device selection.
+ *
+ * This is performed by checking the cuda driver api current context is null, rather than the runtime api + timing based approach which was fluffy / driver updates improving context creation time would break the test.
+ *
+ * @note - This needs to be called first, and only once, hence it is not a true google test.
+ * @todo - It may be better places in it's own test binary, orchestrated via ctest to ensure it is first and only called once.
+*/
+void runCUDASimulationContextCreationTest();
 
 /**
  * Determine the success of the cuda context creation test.
diff --git a/tests/helpers/main.cu b/tests/helpers/main.cu
index 2a3e3c174..60547ada1 100644
--- a/tests/helpers/main.cu
+++ b/tests/helpers/main.cu
@@ -16,8 +16,8 @@ GTEST_API_ int main(int argc, char **argv) {
     flamegpu::io::Telemetry::disable();
     // Suppress the notice about telemetry.
     flamegpu::io::Telemetry::suppressNotice();
-    // Time the cuda agent model initialisation, to check it creates the context.
-    flamegpu::tests::timeCUDASimulationContextCreationTest();
+    // Check cuda context creation, once and only once prior to any CUDA call. This would be better in it's own test binary.
+    flamegpu::tests::runCUDASimulationContextCreationTest();
     // Run the main google test body
     printf("Running main() from %s\n", __FILE__);
     testing::InitGoogleTest(&argc, argv);
diff --git a/tests/test_cases/simulation/test_cuda_simulation_concurrency.cu b/tests/test_cases/simulation/test_cuda_simulation_concurrency.cu
index 7a25a4a54..255b6966d 100644
--- a/tests/test_cases/simulation/test_cuda_simulation_concurrency.cu
+++ b/tests/test_cases/simulation/test_cuda_simulation_concurrency.cu
@@ -1,6 +1,5 @@
 #include "flamegpu/flamegpu.h"
 #include "flamegpu/detail/compute_capability.cuh"
-#include "helpers/device_initialisation.h"
 
 #include "gtest/gtest.h"