Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Having issues on windows, will try Linux
  • Loading branch information
Robadob committed Nov 15, 2023
1 parent e606d4c commit 610634f
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 73 deletions.
2 changes: 1 addition & 1 deletion cmake/dependencies/Jitify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cmake_policy(SET CMP0079 NEW)
FetchContent_Declare(
jitify
GIT_REPOSITORY https://github.com/NVIDIA/jitify.git
GIT_TAG cd6b56bf0c63fcce74a59cd021bf63e5c2a32c73
GIT_TAG jitify2 #-preprocessing-overhaul
SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/jitify-src/jitify
GIT_PROGRESS ON
# UPDATE_DISCONNECTED ON
Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/FindJitify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ include(FindPackageHandleStandardArgs)
# Find the main Jitify header
find_path(Jitify_INCLUDE_DIRS
NAMES
jitify/jitify.hpp
jitify/jitify2.hpp
)

# if found, get the version number.
Expand Down
18 changes: 6 additions & 12 deletions include/flamegpu/detail/JitifyCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,9 @@
#include <string>
#include <vector>

#ifdef _MSC_VER
#pragma warning(push, 2)
#include "jitify/jitify.hpp"
#pragma warning(pop)
#else
#include "jitify/jitify.hpp"
#endif

using jitify::experimental::KernelInstantiation;
namespace jitify2 {
class KernelData;
} // namespace jitify2

namespace flamegpu {
namespace detail {
Expand All @@ -36,7 +30,7 @@ class JitifyCache {
// dynamic header concatenated to kernel
// We check this is an exact match before loading from cache
std::string long_reference;
std::string serialised_kernelinst;
std::string serialised_kernel;
};

public:
Expand All @@ -50,7 +44,7 @@ class JitifyCache {
* @param dynamic_header Dynamic header source generated by curve rtc
* @return A jitify RTC kernel instance of the provided kernel sources
*/
std::unique_ptr<KernelInstantiation> loadKernel(
std::unique_ptr<jitify2::KernelData> loadKernel(
const std::string &func_name,
const std::vector<std::string> &template_args,
const std::string &kernel_src,
Expand Down Expand Up @@ -97,7 +91,7 @@ class JitifyCache {
* @param dynamic_header Dynamic header source generated by curve rtc
* @return A jitify RTC kernel instance of the provided kernel sources
*/
static std::unique_ptr<KernelInstantiation> compileKernel(
static std::unique_ptr<jitify2::KernelData> compileKernel(
const std::string &func_name,
const std::vector<std::string> &template_args,
const std::string &kernel_src,
Expand Down
10 changes: 4 additions & 6 deletions include/flamegpu/runtime/detail/curve/curve_rtc.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
#include <typeindex>
#include <map>

namespace jitify {
namespace experimental {
class KernelInstantiation;
} // namespace experimental
} // namespace jitify
namespace jitify2 {
class KernelData;
} // namespace jitify2
namespace flamegpu {
namespace detail {
namespace curve {
Expand Down Expand Up @@ -214,7 +212,7 @@ class CurveRTCHost {
* @param stream The CUDA stream used for the cuda memcpy
* @note This is async, the stream is non synchronised
*/
void updateDevice_async(const jitify::experimental::KernelInstantiation& instance, cudaStream_t stream);
void updateDevice_async(const jitify2::KernelData& instance, cudaStream_t stream);

protected:
/**
Expand Down
6 changes: 3 additions & 3 deletions include/flamegpu/simulation/detail/CUDAAgent.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ class CUDAAgent : public AgentInterface {
/**
* map of agent function name to RTC function instance
*/
typedef std::map<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMap;
typedef std::map<const std::string, std::unique_ptr<jitify2::KernelData>> CUDARTCFuncMap;
typedef std::map<const std::string, std::unique_ptr<detail::curve::CurveRTCHost>> CUDARTCHeaderMap;
/**
* Element type of CUDARTCFuncMap
*/
typedef std::pair<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMapPair;
typedef std::pair<const std::string, std::unique_ptr<jitify2::KernelData>> CUDARTCFuncMapPair;
/**
* Normal constructor
* @param description Agent description of the agent
Expand Down Expand Up @@ -224,7 +224,7 @@ class CUDAAgent : public AgentInterface {
* Will throw an exception::InvalidAgentFunc excpetion if the function name does not have a valid instantiation
* @param function_name the name of the RTC agent function or the agent function name suffixed with condition (if it is a function condition)
*/
const jitify::experimental::KernelInstantiation& getRTCInstantiation(const std::string &function_name) const;
const jitify2::KernelData& getRTCInstantiation(const std::string &function_name) const;
detail::curve::CurveRTCHost &getRTCHeader(const std::string &function_name) const;
/**
* Returns the host interface for managing the curve instance for the named agent function
Expand Down
93 changes: 68 additions & 25 deletions src/flamegpu/detail/JitifyCache.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
#include <array>
#include <filesystem>

#include "jitify/jitify2.hpp"

#include "flamegpu/version.h"
#include "flamegpu/exception/FLAMEGPUException.h"
#include "flamegpu/detail/compute_capability.cuh"
#include "flamegpu/util/nvtx.h"

using jitify::detail::hash_combine;
using jitify::detail::hash_larson64;

namespace flamegpu {
namespace detail {
Expand Down Expand Up @@ -307,7 +307,7 @@ bool confirmFLAMEGPUHeaderVersion(const std::string &flamegpuIncludeDir, const s
} // namespace

std::mutex JitifyCache::instance_mutex;
std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
std::unique_ptr<jitify2::KernelData> JitifyCache::compileKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
flamegpu::util::nvtx::Range range{"JitifyCache::compileKernel"};
// find and validate the cuda include directory via CUDA_PATH or CUDA_HOME.
static const std::string cuda_include_dir = getCUDAIncludeDir();
Expand All @@ -319,7 +319,7 @@ std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::strin

// vector of compiler options for jitify
std::vector<std::string> options;
std::vector<std::string> headers;
std::unordered_map<std::string, std::string> headers;

// fpgu include directory
options.push_back(std::string("-I" + std::string(flamegpu_include_dir)));
Expand Down Expand Up @@ -402,23 +402,55 @@ std::unique_ptr<KernelInstantiation> JitifyCache::compileKernel(const std::strin
options.push_back(include_cuda_h);

// get the dynamically generated header from curve rtc
headers.push_back(dynamic_header);
headers.emplace("dynamic/curve_rtc_dynamic.h", dynamic_header);

// cassert header (to remove remaining warnings) TODO: Ask Jitify to implement safe version of this
std::string cassert_h = "cassert\n";
headers.push_back(cassert_h);
//std::string cassert_h = "cassert\n";

Check failure on line 408 in src/flamegpu/detail/JitifyCache.cu

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//headers.push_back(cassert_h);

Check failure on line 409 in src/flamegpu/detail/JitifyCache.cu

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment

// Add static list of known headers (this greatly improves compilation speed)
getKnownHeaders(headers);
//getKnownHeaders(headers);

Check failure on line 412 in src/flamegpu/detail/JitifyCache.cu

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment

// jitify to create program (with compilation settings)
try {
auto program = jitify::experimental::Program(kernel_src, headers, options);
assert(template_args.size() == 1 || template_args.size() == 3); // Add this assertion incase template args change
auto kernel = program.kernel(template_args.size() > 1 ? "flamegpu::agent_function_wrapper" : "flamegpu::agent_function_condition_wrapper");
return std::make_unique<KernelInstantiation>(kernel, template_args);
} catch (std::runtime_error const&) {
// jitify does not have a method for getting compile logs so rely on JITIFY_PRINT_LOG defined in cmake
const std::string program_name = func_name + "_program"; // Does this name actually matter?
jitify2::PreprocessedProgram program = jitify2::Program(program_name, kernel_src, headers)->preprocess(options);
if (!program.ok()) {
const jitify2::ErrorMsg& compile_error = program.error();
fprintf(stderr, "Failed to load program for agent function (condition) '%s', log:\n%s",
func_name.c_str(), compile_error.c_str());
THROW exception::InvalidAgentFunc("Error loading agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
"in JitifyCache::buildProgram().",
func_name.c_str());
}
// Build the name of the template configuration to be instantiated
std::stringstream name_expression;
if (template_args.size() == 1) {
name_expression << "flamegpu::agent_function_condition_wrapper<";
name_expression << template_args[0];
name_expression << ">";
} else if (template_args.size() == 3) {
name_expression << "flamegpu::agent_function_wrapper<";
name_expression << template_args[0] << "," << template_args[1] << "," << template_args[2];
name_expression << ">";
} else {
THROW exception::UnknownInternalError("Unexpected AgentFunction template arg count!");
}
auto loaded_program = program->load({ name_expression.str() });
if (!loaded_program.ok()) {
const jitify2::ErrorMsg &compile_error = loaded_program.error();
fprintf(stderr, "Failed to load program for agent function (condition) '%s', log:\n%s",
func_name.c_str(), compile_error.c_str());
THROW exception::InvalidAgentFunc("Error loading agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
"in JitifyCache::buildProgram().",
func_name.c_str());
}
auto loaded_kernel = loaded_program->get_kernel("");
if (loaded_kernel.ok()) {
return std::make_unique<jitify2::KernelData>(loaded_kernel.value());
} else {
const jitify2::ErrorMsg &compile_error = loaded_kernel.error();
fprintf(stderr, "Failed to compile and link agent function (condition) '%s', log:\n%s",
func_name.c_str(), compile_error.c_str());
THROW exception::InvalidAgentFunc("Error compiling runtime agent function (or function condition) ('%s'): function had compilation errors (see std::cout), "
"in JitifyCache::buildProgram().",
func_name.c_str());
Expand Down Expand Up @@ -497,7 +529,7 @@ void JitifyCache::getKnownHeaders(std::vector<std::string>& headers) {
headers.push_back("type_traits");
}

std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
std::unique_ptr<jitify2::KernelData> JitifyCache::loadKernel(const std::string &func_name, const std::vector<std::string> &template_args, const std::string &kernel_src, const std::string &dynamic_header) {
flamegpu::util::nvtx::Range range{"JitifyCache::loadKernel"};
std::lock_guard<std::mutex> lock(cache_mutex);
// Detect current compute capability=
Expand Down Expand Up @@ -527,14 +559,19 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
"XORWOW_" +
#endif
// Use jitify hash methods for consistent hashing between OSs
std::to_string(hash_combine(hash_larson64(kernel_src.c_str()), hash_larson64(dynamic_header.c_str())));
jitify2::detail::sha256(kernel_src + dynamic_header);/*

Check failure on line 562 in src/flamegpu/detail/JitifyCache.cu

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Complex multi-line /*...*/-style comment found. Lint may give bogus warnings. Consider replacing these with //-style comments, with #if 0...#endif, or with more clearly structured multi-line comments.
// Does a copy with the right reference exist in memory?
if (use_memory_cache) {
const auto it = cache.find(short_reference);
if (it != cache.end()) {
// Check long reference
if (it->second.long_reference == long_reference) {
return std::make_unique<KernelInstantiation>(KernelInstantiation::deserialize(it->second.serialised_kernelinst));
// Deserialize and return program
jitify2::Kernel prog = jitify2::Kernel::deserialize(it->second.serialised_kernel);
if (prog.ok()) {
return std::make_unique<jitify2::KernelData>(prog.value());
}
// Fail silently and try to build code
}
}
}
Expand All @@ -551,24 +588,29 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
// Add it to cache for later loads
cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernelinst});
// Deserialize and return program
return std::make_unique<KernelInstantiation>(KernelInstantiation::deserialize(serialised_kernelinst));
jitify2::Kernel prog = jitify2::Kernel::deserialize(serialised_kernelinst);
if (prog.ok()) {
return std::make_unique<jitify2::KernelData>(prog.value());
}
// Fail silently and try to build code
}
}
}
}*/
// Kernel has not yet been cached
{

Check failure on line 600 in src/flamegpu/detail/JitifyCache.cu

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

{ should almost always be at the end of the previous line
// Build kernel
auto kernelinst = compileKernel(func_name, template_args, kernel_src, dynamic_header);
std::unique_ptr<jitify2::KernelData> kernel = compileKernel(func_name, template_args, kernel_src, dynamic_header);
/*
// Add it to cache for later loads
const std::string serialised_kernelinst = use_memory_cache || use_disk_cache ? kernelinst->serialize() : "";
const std::string serialised_kernel = use_memory_cache || use_disk_cache ? kernel->serialize() : "";
if (use_memory_cache) {
cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernelinst});
cache.emplace(short_reference, CachedProgram{long_reference, serialised_kernel });
}
// Save it to disk
if (use_disk_cache) {
std::ofstream ofs(cache_file, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc);
if (ofs) {
ofs << serialised_kernelinst;
ofs << serialised_kernel;
ofs.close();
}
ofs = std::ofstream(reference_file, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc);
Expand All @@ -577,7 +619,8 @@ std::unique_ptr<KernelInstantiation> JitifyCache::loadKernel(const std::string &
ofs.close();
}
}
return kernelinst;
*/
return kernel;
}
}
void JitifyCache::useMemoryCache(bool yesno) {
Expand Down
22 changes: 12 additions & 10 deletions src/flamegpu/runtime/detail/curve/curve_rtc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
#include "flamegpu/simulation/detail/EnvironmentManager.cuh"
#include "flamegpu/detail/cuda.cuh"

// jitify include for demangle
#ifdef _MSC_VER
#pragma warning(push, 2)
#include "jitify/jitify.hpp"
#pragma warning(pop)
#else
#include "jitify/jitify.hpp"
#endif
#include "jitify/jitify2.hpp"
//// jitify include for demangle
//#ifdef _MSC_VER

Check failure on line 11 in src/flamegpu/runtime/detail/curve/curve_rtc.cpp

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//#pragma warning(push, 2)

Check failure on line 12 in src/flamegpu/runtime/detail/curve/curve_rtc.cpp

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//#include "jitify/jitify.hpp"

Check failure on line 13 in src/flamegpu/runtime/detail/curve/curve_rtc.cpp

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//#pragma warning(pop)

Check failure on line 14 in src/flamegpu/runtime/detail/curve/curve_rtc.cpp

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//#else

Check failure on line 15 in src/flamegpu/runtime/detail/curve/curve_rtc.cpp

View workflow job for this annotation

GitHub Actions / cpplint (11.8, ubuntu-20.04)

Should have a space between // and comment
//#include "jitify/jitify.hpp"
//#endif

namespace flamegpu {
namespace detail {
Expand Down Expand Up @@ -1052,10 +1053,11 @@ void CurveRTCHost::updateEnvCache(const void *env_ptr, const size_t bufferLen) {
bufferLen, agent_data_offset);
}
}
void CurveRTCHost::updateDevice_async(const jitify::experimental::KernelInstantiation& instance, cudaStream_t stream) {
void CurveRTCHost::updateDevice_async(const jitify2::KernelData& instance, cudaStream_t stream) {
// The namespace is required here, but not in other uses of getVariableSymbolName.
std::string cache_var_name = std::string("flamegpu::detail::curve::") + getVariableSymbolName();
CUdeviceptr d_var_ptr = instance.get_global_ptr(cache_var_name.c_str());
CUdeviceptr d_var_ptr;
instance.program().get_global_ptr(cache_var_name.c_str(), &d_var_ptr);
gpuErrchkDriverAPI(cuMemcpyHtoDAsync(d_var_ptr, h_data_buffer, data_buffer_size, stream));
}

Expand Down
26 changes: 12 additions & 14 deletions src/flamegpu/simulation/CUDASimulation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <map>
#include <numeric>

#include "jitify/jitify2.hpp"

#include "flamegpu/detail/curand.cuh"
#include "flamegpu/model/AgentFunctionData.cuh"
#include "flamegpu/model/LayerData.h"
Expand Down Expand Up @@ -736,24 +738,22 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
} else { // RTC function
std::string func_condition_identifier = func_name + "_condition";
// get instantiation
const jitify::experimental::KernelInstantiation& instance = cuda_agent.getRTCInstantiation(func_condition_identifier);
const jitify2::KernelData& instance = cuda_agent.getRTCInstantiation(func_condition_identifier);
// calculate the grid block size for main agent function
CUfunction cu_func = (CUfunction)instance;
CUfunction cu_func = instance.function();
cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cu_func, 0, 0, state_list_size);
//! Round up according to CUDAAgent state list size
gridSize = (state_list_size + blockSize - 1) / blockSize;
// launch the kernel
CUresult a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx)).launch({
jitify2::ErrorMsg a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx))->launch({
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
reinterpret_cast<void*>(&error_buffer),
#endif
const_cast<void *>(reinterpret_cast<const void*>(&state_list_size)),
reinterpret_cast<void*>(&t_rng),
reinterpret_cast<void*>(&scanFlag_agentDeath) });
if (a != CUresult::CUDA_SUCCESS) {
const char* err_str = nullptr;
cuGetErrorString(a, &err_str);
THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function condition '%s': %s", func_des->rtc_func_condition_name.c_str(), err_str);
if (!a.empty()) {
THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function condition '%s': %s", func_des->rtc_func_condition_name.c_str(), a.c_str());
}
gpuErrchkLaunch();
}
Expand Down Expand Up @@ -966,14 +966,14 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
gpuErrchkLaunch();
} else { // assume this is a runtime specified agent function
// get instantiation
const jitify::experimental::KernelInstantiation& instance = cuda_agent.getRTCInstantiation(func_name);
const jitify2::KernelData& instance = cuda_agent.getRTCInstantiation(func_name);
// calculate the grid block size for main agent function
CUfunction cu_func = (CUfunction)instance;
CUfunction cu_func = (CUfunction)instance.function();
cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cu_func, 0, 0, state_list_size);
//! Round up according to CUDAAgent state list size
gridSize = (state_list_size + blockSize - 1) / blockSize;
// launch the kernel
CUresult a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx)).launch({
jitify2::ErrorMsg a = instance.configure(gridSize, blockSize, 0, this->getStream(streamIdx))->launch({
#if !defined(FLAMEGPU_SEATBELTS) || FLAMEGPU_SEATBELTS
reinterpret_cast<void*>(&error_buffer),
#endif
Expand All @@ -985,10 +985,8 @@ void CUDASimulation::stepLayer(const std::shared_ptr<LayerData>& layer, const un
reinterpret_cast<void*>(&scanFlag_agentDeath),
reinterpret_cast<void*>(&scanFlag_messageOutput),
reinterpret_cast<void*>(&scanFlag_agentOutput)});
if (a != CUresult::CUDA_SUCCESS) {
const char* err_str = nullptr;
cuGetErrorString(a, &err_str);
THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function '%s': %s", func_name.c_str(), err_str);
if (!a.empty()) {
THROW exception::InvalidAgentFunc("There was a problem launching the runtime agent function '%s': %s", func_name.c_str(), a.c_str());
}
gpuErrchkLaunch();
}
Expand Down
Loading

0 comments on commit 610634f

Please sign in to comment.