wip

Having issues on windows, will try Linux
FLAMEGPU · Nov 15, 2023 · 610634f · 610634f
1 parent e606d4c
commit 610634f
Show file tree

Hide file tree

Showing 9 changed files with 110 additions and 73 deletions.
diff --git a/cmake/dependencies/Jitify.cmake b/cmake/dependencies/Jitify.cmake
@@ -10,7 +10,7 @@ cmake_policy(SET CMP0079 NEW)
 FetchContent_Declare(
     jitify
     GIT_REPOSITORY https://github.com/NVIDIA/jitify.git
-    GIT_TAG        cd6b56bf0c63fcce74a59cd021bf63e5c2a32c73
+    GIT_TAG        jitify2 #-preprocessing-overhaul
     SOURCE_DIR     ${FETCHCONTENT_BASE_DIR}/jitify-src/jitify
     GIT_PROGRESS   ON
     # UPDATE_DISCONNECTED   ON

diff --git a/cmake/modules/FindJitify.cmake b/cmake/modules/FindJitify.cmake
@@ -20,7 +20,7 @@ include(FindPackageHandleStandardArgs)
 # Find the main Jitify header
 find_path(Jitify_INCLUDE_DIRS
     NAMES
-        jitify/jitify.hpp
+        jitify/jitify2.hpp
 )
 
 # if found, get the version number.

diff --git a/include/flamegpu/detail/JitifyCache.h b/include/flamegpu/detail/JitifyCache.h
@@ -7,15 +7,9 @@
 #include <string>
 #include <vector>
 
-#ifdef _MSC_VER
-#pragma warning(push, 2)
-#include "jitify/jitify.hpp"
-#pragma warning(pop)
-#else
-#include "jitify/jitify.hpp"
-#endif
-
-using jitify::experimental::KernelInstantiation;
+namespace jitify2 {
+class KernelData;
+}  // namespace jitify2
 
 namespace flamegpu {
 namespace detail {
@@ -36,7 +30,7 @@ class JitifyCache {
         // dynamic header concatenated to kernel
         // We check this is an exact match before loading from cache
         std::string long_reference;
-        std::string serialised_kernelinst;
+        std::string serialised_kernel;
     };
 
  public:
@@ -50,7 +44,7 @@ class JitifyCache {
      * @param dynamic_header Dynamic header source generated by curve rtc
      * @return A jitify RTC kernel instance of the provided kernel sources
      */
-    std::unique_ptr<KernelInstantiation> loadKernel(
+    std::unique_ptr<jitify2::KernelData> loadKernel(
         const std::string &func_name,
         const std::vector<std::string> &template_args,
         const std::string &kernel_src,
@@ -97,7 +91,7 @@ class JitifyCache {
      * @param dynamic_header Dynamic header source generated by curve rtc
      * @return A jitify RTC kernel instance of the provided kernel sources
      */
-    static std::unique_ptr<KernelInstantiation> compileKernel(
+    static std::unique_ptr<jitify2::KernelData> compileKernel(
     const std::string &func_name,
     const std::vector<std::string> &template_args,
     const std::string &kernel_src,

diff --git a/include/flamegpu/runtime/detail/curve/curve_rtc.cuh b/include/flamegpu/runtime/detail/curve/curve_rtc.cuh
@@ -9,11 +9,9 @@
 #include <typeindex>
 #include <map>
 
-namespace jitify {
-namespace experimental {
-class KernelInstantiation;
-}  // namespace experimental
-}  // namespace jitify
+namespace jitify2 {
+class KernelData;
+}  // namespace jitify2
 namespace flamegpu {
 namespace detail {
 namespace curve {
@@ -214,7 +212,7 @@ class CurveRTCHost {
      * @param stream The CUDA stream used for the cuda memcpy
      * @note This is async, the stream is non synchronised
      */
-    void updateDevice_async(const jitify::experimental::KernelInstantiation& instance, cudaStream_t stream);
+    void updateDevice_async(const jitify2::KernelData& instance, cudaStream_t stream);
 
  protected:
    /**

diff --git a/include/flamegpu/simulation/detail/CUDAAgent.h b/include/flamegpu/simulation/detail/CUDAAgent.h
@@ -39,12 +39,12 @@ class CUDAAgent : public AgentInterface {
     /**
      *  map of agent function name to RTC function instance
      */
-     typedef std::map<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMap;
+     typedef std::map<const std::string, std::unique_ptr<jitify2::KernelData>> CUDARTCFuncMap;
      typedef std::map<const std::string, std::unique_ptr<detail::curve::CurveRTCHost>> CUDARTCHeaderMap;
     /**
      * Element type of CUDARTCFuncMap
      */
-    typedef std::pair<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMapPair;
+    typedef std::pair<const std::string, std::unique_ptr<jitify2::KernelData>> CUDARTCFuncMapPair;
     /**
      * Normal constructor
      * @param description Agent description of the agent
@@ -224,7 +224,7 @@ class CUDAAgent : public AgentInterface {
      * Will throw an exception::InvalidAgentFunc excpetion if the function name does not have a valid instantiation
      * @param function_name the name of the RTC agent function or the agent function name suffixed with condition (if it is a function condition)
      */
-    const jitify::experimental::KernelInstantiation& getRTCInstantiation(const std::string &function_name) const;
+    const jitify2::KernelData& getRTCInstantiation(const std::string &function_name) const;
     detail::curve::CurveRTCHost &getRTCHeader(const std::string &function_name) const;
     /**
      * Returns the host interface for managing the curve instance for the named agent function

diff --git a/src/flamegpu/detail/JitifyCache.cu b/src/flamegpu/detail/JitifyCache.cu
@@ -7,13 +7,13 @@
 #include <array>
 #include <filesystem>
 
+#include "jitify/jitify2.hpp"
+
 #include "flamegpu/version.h"
 #include "flamegpu/exception/FLAMEGPUException.h"
 #include "flamegpu/detail/compute_capability.cuh"
 #include "flamegpu/util/nvtx.h"
 
-using jitify::detail::hash_combine;
-using jitify::detail::hash_larson64;
 
 namespace flamegpu {
 namespace detail {
@@ -307,7 +307,7 @@ bool confirmFLAMEGPUHeaderVersion(const std::string &flamegpuIncludeDir, const s
 }  // namespace
 
 std::mutex JitifyCache::instance_mutex;
-std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
+std::unique_ptr<jitify2::KernelData> JitifyCache::compileKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
     flamegpu::util::nvtx::Range range{"JitifyCache::compileKernel"};
     // find and validate the cuda include directory via CUDA_PATH or CUDA_HOME.
     static const std::string cuda_include_dir = getCUDAIncludeDir();
@@ -319,7 +319,7 @@ std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::strin
 
      // vector of compiler options for jitify
     std::vector<std::string> options;
-    std::vector<std::string> headers;
+    std::unordered_map<std::string, std::string> headers;
 
     // fpgu include directory
     options.push_back(std::string("-I" + std::string(flamegpu_include_dir)));
@@ -402,23 +402,55 @@ std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::strin
     options.push_back(include_cuda_h);
 
     // get the dynamically generated header from curve rtc
-    headers.push_back(dynamic_header);
+    headers.emplace("dynamic/curve_rtc_dynamic.h", dynamic_header);
 
     // cassert header (to remove remaining warnings) TODO: Ask Jitify to implement safe version of this
-    std::string cassert_h = "cassert\n";
-    headers.push_back(cassert_h);
+    //std::string cassert_h = "cassert\n";
+    //headers.push_back(cassert_h);
 
     // Add static list of known headers (this greatly improves compilation speed)
-    getKnownHeaders(headers);
+    //getKnownHeaders(headers);
 
     // jitify to create program (with compilation settings)
-    try {
-        auto program = jitify::experimental::Program(kernel_src, headers, options);
-        assert(template_args.size() == 1 || template_args.size() == 3);  // Add this assertion incase template args change
-        auto kernel = program.kernel(template_args.size() > 1 ? "flamegpu::agent_function_wrapper" : "flamegpu::agent_function_condition_wrapper");
-        return std::make_unique<KernelInstantiation>(kernel, template_args);
-    } catch (std::runtime_error const&) {
-        // jitify does not have a method for getting compile logs so rely on JITIFY_PRINT_LOG defined in cmake
+    const std::string program_name = func_name + "_program";  // Does this name actually matter?
+    jitify2::PreprocessedProgram program = jitify2::Program(program_name, kernel_src, headers)->preprocess(options);
+    if (!program.ok()) {
+        const jitify2::ErrorMsg& compile_error = program.error();
+        fprintf(stderr, "Failed to load program for agent function (condition) '%s', log:\n%s",
+            func_name.c_str(), compile_error.c_str());
+        THROW exception::InvalidAgentFunc("Error loading agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
+            "in JitifyCache::buildProgram().",
+            func_name.c_str());
+    }
+    // Build the name of the template configuration to be instantiated
+    std::stringstream name_expression;
+    if (template_args.size() == 1) {
+        name_expression << "flamegpu::agent_function_condition_wrapper<";
+        name_expression << template_args[0];
+        name_expression << ">";
+    } else if (template_args.size() == 3) {
+        name_expression << "flamegpu::agent_function_wrapper<";
+        name_expression << template_args[0] << "," << template_args[1] << "," << template_args[2];
+        name_expression << ">";
+    } else {
+        THROW exception::UnknownInternalError("Unexpected AgentFunction template arg count!");
+    }
+    auto loaded_program = program->load({ name_expression.str() });
+    if (!loaded_program.ok()) {
+        const jitify2::ErrorMsg &compile_error = loaded_program.error();
+        fprintf(stderr, "Failed to load program for agent function (condition) '%s', log:\n%s",
+            func_name.c_str(), compile_error.c_str());
+        THROW exception::InvalidAgentFunc("Error loading agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
+            "in JitifyCache::buildProgram().",
+            func_name.c_str());
+    }
+    auto loaded_kernel = loaded_program->get_kernel("");
+    if (loaded_kernel.ok()) {
+        return std::make_unique<jitify2::KernelData>(loaded_kernel.value());
+    } else {
+        const jitify2::ErrorMsg &compile_error = loaded_kernel.error();
+        fprintf(stderr, "Failed to compile and link agent function (condition) '%s', log:\n%s",
+            func_name.c_str(), compile_error.c_str());
         THROW exception::InvalidAgentFunc("Error compiling runtime agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
             "in JitifyCache::buildProgram().",
             func_name.c_str());
@@ -497,7 +529,7 @@ void JitifyCache::getKnownHeaders(std::vector<std::string>& headers) {
     headers.push_back("type_traits");
 }
 
-std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
+std::unique_ptr<jitify2::KernelData> JitifyCache::loadKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
     flamegpu::util::nvtx::Range range{"JitifyCache::loadKernel"};
     std::lock_guard<std::mutex> lock(cache_mutex);
     // Detect current compute capability=
@@ -527,14 +559,19 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
         "XORWOW_" +
 #endif
         // Use jitify hash methods for consistent hashing between OSs
-        std::to_string(hash_combine(hash_larson64(kernel_src.c_str()), hash_larson64(dynamic_header.c_str())));
+        jitify2::detail::sha256(kernel_src + dynamic_header);/*
     // Does a copy with the right reference exist in memory?
     if (use_memory_cache) {
         const auto it = cache.find(short_reference);
         if (it != cache.end()) {
             // Check long reference
             if (it->second.long_reference == long_reference) {
-                return std::make_unique<KernelInstantiation>(KernelInstantiation::deserialize(it->second.serialised_kernelinst));
+                // Deserialize and return program
+                jitify2::Kernel prog = jitify2::Kernel::deserialize(it->second.serialised_kernel);
+                if (prog.ok()) {
+                    return std::make_unique<jitify2::KernelData>(prog.value());
+                }
+                // Fail silently and try to build code
             }
         }
     }
@@ -551,24 +588,29 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
                 // Add it to cache for later loads
                 cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernelinst});
                 // Deserialize and return program
-                return std::make_unique<KernelInstantiation>(KernelInstantiation::deserialize(serialised_kernelinst));
+                jitify2::Kernel prog = jitify2::Kernel::deserialize(serialised_kernelinst);
+                if (prog.ok()) {
+                    return std::make_unique<jitify2::KernelData>(prog.value());
+                }
+                // Fail silently and try to build code
             }
         }
-    }
+    }*/
     // Kernel has not yet been cached
     {
         // Build kernel
-        auto kernelinst = compileKernel(func_name, template_args, kernel_src, dynamic_header);
+        std::unique_ptr<jitify2::KernelData> kernel = compileKernel(func_name, template_args, kernel_src, dynamic_header);
+/*
         // Add it to cache for later loads
-        const std::string serialised_kernelinst = use_memory_cache || use_disk_cache ? kernelinst->serialize() : "";
+        const std::string serialised_kernel = use_memory_cache || use_disk_cache ? kernel->serialize() : "";
         if (use_memory_cache) {
-            cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernelinst});
+            cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernel });
         }
         // Save it to disk
         if (use_disk_cache) {
             std::ofstream ofs(cache_file, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc);
             if (ofs) {
-                ofs << serialised_kernelinst;
+                ofs << serialised_kernel;
                 ofs.close();
             }
             ofs = std::ofstream(reference_file, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc);
@@ -577,7 +619,8 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
                 ofs.close();
             }
         }
-        return kernelinst;
+*/
+        return kernel;
     }
 }
 void JitifyCache::useMemoryCache(bool yesno) {

diff --git a/src/flamegpu/runtime/detail/curve/curve_rtc.cpp b/src/flamegpu/runtime/detail/curve/curve_rtc.cpp
@@ -6,14 +6,15 @@
 #include "flamegpu/simulation/detail/EnvironmentManager.cuh"
 #include "flamegpu/detail/cuda.cuh"
 
-// jitify include for demangle
-#ifdef _MSC_VER
-#pragma warning(push, 2)
-#include "jitify/jitify.hpp"
-#pragma warning(pop)
-#else
-#include "jitify/jitify.hpp"
-#endif
+#include "jitify/jitify2.hpp"
+//// jitify include for demangle
+//#ifdef _MSC_VER
+//#pragma warning(push, 2)
+//#include "jitify/jitify.hpp"
+//#pragma warning(pop)
+//#else
+//#include "jitify/jitify.hpp"
+//#endif
 
 namespace flamegpu {
 namespace detail {
@@ -1052,10 +1053,11 @@ void CurveRTCHost::updateEnvCache(const void *env_ptr, const size_t bufferLen) {
             bufferLen, agent_data_offset);
     }
 }
-void CurveRTCHost::updateDevice_async(const jitify::experimental::KernelInstantiation& instance, cudaStream_t stream) {
+void CurveRTCHost::updateDevice_async(const jitify2::KernelData& instance, cudaStream_t stream) {
     // The namespace is required here, but not in other uses of getVariableSymbolName.
     std::string cache_var_name = std::string("flamegpu::detail::curve::") + getVariableSymbolName();
-    CUdeviceptr d_var_ptr = instance.get_global_ptr(cache_var_name.c_str());
+    CUdeviceptr d_var_ptr;
+    instance.program().get_global_ptr(cache_var_name.c_str(), &d_var_ptr);
     gpuErrchkDriverAPI(cuMemcpyHtoDAsync(d_var_ptr, h_data_buffer, data_buffer_size, stream));
 }
 

diff --git a/src/flamegpu/simulation/CUDASimulation.cu b/src/flamegpu/simulation/CUDASimulation.cu
@@ -6,6 +6,8 @@
 #include <map>
 #include <numeric>
 
+#include "jitify/jitify2.hpp"
+
 #include "flamegpu/detail/curand.cuh"
 #include "flamegpu/model/AgentFunctionData.cuh"
 #include "flamegpu/model/LayerData.h"
@@ -736,24 +738,22 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
                 } else {  // RTC function
                     std::string func_condition_identifier = func_name + "_condition";
                     // get instantiation
-                    const jitify::experimental::KernelInstantiation& instance = cuda_agent.getRTCInstantiation(func_condition_identifier);
+                    const jitify2::KernelData& instance = cuda_agent.getRTCInstantiation(func_condition_identifier);
                     // calculate the grid block size for main agent function
-                    CUfunction cu_func = (CUfunction)instance;
+                    CUfunction cu_func = instance.function();
                     cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cu_func, 0, 0, state_list_size);
                     //! Round up according to CUDAAgent state list size
                     gridSize = (state_list_size + blockSize - 1) / blockSize;
                     // launch the kernel
-                    CUresult a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx)).launch({
+                    jitify2::ErrorMsg a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx))->launch({
 #if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
                         reinterpret_cast<void*>(&error_buffer),
 #endif
                         const_cast<void *>(reinterpret_cast<const void*>(&state_list_size)),
                         reinterpret_cast<void*>(&t_rng),
                         reinterpret_cast<void*>(&scanFlag_agentDeath) });
-                    if (a != CUresult::CUDA_SUCCESS) {
-                        const char* err_str = nullptr;
-                        cuGetErrorString(a, &err_str);
-                        THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function condition '%s': %s", func_des->rtc_func_condition_name.c_str(), err_str);
+                    if (!a.empty()) {
+                        THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function condition '%s': %s", func_des->rtc_func_condition_name.c_str(), a.c_str());
                     }
                     gpuErrchkLaunch();
                 }
@@ -966,14 +966,14 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
                 gpuErrchkLaunch();
             } else {      // assume this is a runtime specified agent function
                 // get instantiation
-                const jitify::experimental::KernelInstantiation& instance = cuda_agent.getRTCInstantiation(func_name);
+                const jitify2::KernelData& instance = cuda_agent.getRTCInstantiation(func_name);
                 // calculate the grid block size for main agent function
-                CUfunction cu_func = (CUfunction)instance;
+                CUfunction cu_func = (CUfunction)instance.function();
                 cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cu_func, 0, 0, state_list_size);
                 //! Round up according to CUDAAgent state list size
                 gridSize = (state_list_size + blockSize - 1) / blockSize;
                 // launch the kernel
-                CUresult a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx)).launch({
+                jitify2::ErrorMsg a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx))->launch({
 #if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
                     reinterpret_cast<void*>(&error_buffer),
 #endif
@@ -985,10 +985,8 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
                     reinterpret_cast<void*>(&scanFlag_agentDeath),
                     reinterpret_cast<void*>(&scanFlag_messageOutput),
                     reinterpret_cast<void*>(&scanFlag_agentOutput)});
-                if (a != CUresult::CUDA_SUCCESS) {
-                    const char* err_str = nullptr;
-                    cuGetErrorString(a, &err_str);
-                    THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function '%s': %s", func_name.c_str(), err_str);
+                if (!a.empty()) {
+                    THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function '%s': %s", func_name.c_str(), a.c_str());
                 }
                 gpuErrchkLaunch();
             }