diff --git a/CMakeLists.txt b/CMakeLists.txt index 169dd8498..ea34c9f18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,23 @@ set(DOCS_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/agent-functions/modifying-agent-variables.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/agent-functions/random-numbers.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/creating-a-model/index.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/index.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/logging.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_cycle_stages_6(x512)_a.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_cycle_stages_6(x512)_b.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_living_count(x512)_a.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_living_count(x512)_b.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_MYCN(x512)_a.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_MYCN(x512)_b.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_telo_count(x512)_a.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/NB_telo_count(x512)_b.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/printf.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/seatbelts.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/using-a-debugger.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/visual_studio_attach_to_process_dialog.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/visual_studio_build_config.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/visual_studio_startup_project.png" + "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/debugging-models/visual_studio_start_debugger.png" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/defining-agents/index.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/defining-execution-order/dependency-graph.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/defining-execution-order/exit-conditions.rst" @@ -74,7 +91,6 @@ set(DOCS_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/running-a-simulation/index.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/running-a-simulation/initial-state.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/running-multiple-simulations/index.rst" - "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/seatbelts/index.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/visualisation/adding-details.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/visualisation/building-with-vis.rst" "${CMAKE_CURRENT_SOURCE_DIR}/src/guide/visualisation/index.rst" @@ -105,8 +121,8 @@ foreach(SRC_FILE IN LISTS DOCS_SRC_FILES) add_custom_command( OUTPUT ${BUILD_SRC_FILE} DEPENDS ${SRC_FILE} ${CMAKE_CURRENT_BINARY_DIR}/src/index.rst - COMMAND ${CMAKE_COMMAND} -E copy ${SRC_FILE} ${BUILD_SRC_FILE} - COMMENT "Copying ${SRC_FILE} to ${BUILD_SRC_FILE}" + COMMAND ${CMAKE_COMMAND} -E copy "\"${SRC_FILE}\"" "\"${BUILD_SRC_FILE}\"" + COMMENT "Copying '${SRC_FILE}' to '${BUILD_SRC_FILE}'" ) list(APPEND BUILD_SRC_DEPENDS "${BUILD_SRC_FILE}") unset(BUILD_SRC_FILE) diff --git a/src/guide/agent-functions/defining-agent-functions.rst b/src/guide/agent-functions/defining-agent-functions.rst index b0c7f3c19..397302c03 100644 --- a/src/guide/agent-functions/defining-agent-functions.rst +++ b/src/guide/agent-functions/defining-agent-functions.rst @@ -37,6 +37,8 @@ For compile time (i.e. non-RTC functions), when using the C++ API, the :c:macro: // ... } +.. _Runtime Compiled Agent Functions: + C++ and Python Runtime Compiled Agent Functions ----------------------------------------------- diff --git a/src/guide/debugging-models/NB_MYCN(x512)_a.png b/src/guide/debugging-models/NB_MYCN(x512)_a.png new file mode 100644 index 000000000..dd1edaa0f Binary files /dev/null and b/src/guide/debugging-models/NB_MYCN(x512)_a.png differ diff --git a/src/guide/debugging-models/NB_MYCN(x512)_b.png b/src/guide/debugging-models/NB_MYCN(x512)_b.png new file mode 100644 index 000000000..21135009a Binary files /dev/null and b/src/guide/debugging-models/NB_MYCN(x512)_b.png differ diff --git a/src/guide/debugging-models/NB_cycle_stages_6(x512)_a.png b/src/guide/debugging-models/NB_cycle_stages_6(x512)_a.png new file mode 100644 index 000000000..78a6ce76f Binary files /dev/null and b/src/guide/debugging-models/NB_cycle_stages_6(x512)_a.png differ diff --git a/src/guide/debugging-models/NB_cycle_stages_6(x512)_b.png b/src/guide/debugging-models/NB_cycle_stages_6(x512)_b.png new file mode 100644 index 000000000..00766e194 Binary files /dev/null and b/src/guide/debugging-models/NB_cycle_stages_6(x512)_b.png differ diff --git a/src/guide/debugging-models/NB_living_count(x512)_a.png b/src/guide/debugging-models/NB_living_count(x512)_a.png new file mode 100644 index 000000000..69e621911 Binary files /dev/null and b/src/guide/debugging-models/NB_living_count(x512)_a.png differ diff --git a/src/guide/debugging-models/NB_living_count(x512)_b.png b/src/guide/debugging-models/NB_living_count(x512)_b.png new file mode 100644 index 000000000..c02758670 Binary files /dev/null and b/src/guide/debugging-models/NB_living_count(x512)_b.png differ diff --git a/src/guide/debugging-models/NB_telo_count(x512)_a.png b/src/guide/debugging-models/NB_telo_count(x512)_a.png new file mode 100644 index 000000000..a49d05e01 Binary files /dev/null and b/src/guide/debugging-models/NB_telo_count(x512)_a.png differ diff --git a/src/guide/debugging-models/NB_telo_count(x512)_b.png b/src/guide/debugging-models/NB_telo_count(x512)_b.png new file mode 100644 index 000000000..c133904ce Binary files /dev/null and b/src/guide/debugging-models/NB_telo_count(x512)_b.png differ diff --git a/src/guide/debugging-models/index.rst b/src/guide/debugging-models/index.rst new file mode 100644 index 000000000..5cb241f2c --- /dev/null +++ b/src/guide/debugging-models/index.rst @@ -0,0 +1,17 @@ +.. _DebuggingModels: + +Debugging Models +================ + +Implementing large models can be challenging, therefore it's likely you will need to debug your model during development to hunt down the cause of unwanted behaviours. Furthemore, once you have completed your model it's likely that you will need to validate and calibrate your model, which may follow a similar process. + +This chapter has been broken up into several sections, each detailing a different approach to debugging FLAME GPU 2 models: + + +.. toctree:: + :maxdepth: 1 + + seatbelts.rst + printf.rst + logging.rst + using-a-debugger.rst \ No newline at end of file diff --git a/src/guide/debugging-models/logging.rst b/src/guide/debugging-models/logging.rst new file mode 100644 index 000000000..74b852de6 --- /dev/null +++ b/src/guide/debugging-models/logging.rst @@ -0,0 +1,245 @@ +.. _DebuggingModelsLogging: + +Logging Timeseries Data +======================= + +:ref:`Logging` can be used to collect timeseries data, such as how the size of agent populations change throughout a model's execution. Furthermore, with :ref:`ensembles` this data can be collected across a batch of runs, especially useful when dealing with stochastic models. + +Use of logging in this manner is particularly useful when porting a pre-existing model to FLAME GPU 2, and is an approach we have used widely to validate our FLAME GPU 2 models behaviour matches. + +1. Decide a model configuration which can be executed in both versions of the model +2. Execute both models ~100+ times, with a variety of different random seeds. +3. From these executions, collect timeseries data for a range of important model values. These might be population sizes, environment properties or the mean of agent variables. +4. Calculate the mean and standard deviation of each data point for both data sets and graph them. +5. The graph should contain two lines, if the models are operating the same they should match closely (this will also depend on the model's stochasticity and number of runs collected). +6. Presence of differences in some graphs before others can help narrow down the source of differences assisting in them being solved. + + +Below are some example before and after graphs, demonstrating how visible small bugs can be. + +Both the Python and FLAME GPU models were executed with 512 different random seeds, for a low number of steps (60-80). + +The number of agents is a major signal for the two models being equivalent. + +**The average +- the standard deviation of the number of living NB cells at each time step.** + +.. image:: NB_living_count(x512)_a.png + :width: 400 + :alt: The graph shows the initial (bugged) case, whereby the two plots diverge early. + +.. image:: NB_living_count(x512)_b.png + :width: 400 + :alt: The graph shows the final (fixed) case, whereby the two plots match closely. + +When the number of agents is divergent, there will normally be multiple other agent variables which influence agent birth/death which diverge too. + +**The average +- the standard deviation of the average of NB agent variable MYCN at each time step.** + +.. image:: NB_MYCN(x512)_a.png + :width: 400 + :alt: The graph shows the initial (bugged) case, whereby the two plots diverge early. + +.. image:: NB_MYCN(x512)_b.png + :width: 400 + :alt: The graph shows the final (fixed) case, whereby the two plots match closely. + +**The average +- the standard deviation of the average of NB agent variable telo_count at each time step.** + +.. image:: NB_telo_count(x512)_a.png + :width: 400 + :alt: The graph shows the initial (bugged) case, whereby the two plots diverge early. + +.. image:: NB_telo_count(x512)_b.png + :width: 400 + :alt: The graph shows the final (fixed) case, whereby the two plots match closely. + +Often times, awareness of the order in which such agent variables diverge will narrow the source of the problem sufficiently. However, in some cases even knowing the agent function at fault is not precise enough to spot a subtle mistake. At this point we suggest adding counters, :ref:`using environment macro properties`, to track how frequently different code-paths are followed, this can lead to identifying the specific condition at fault. + +**The average +- the standard deviation of the proportion of NB agent's which passed the 6th branch within the cell cycle agent function.** + +.. image:: NB_cycle_stages_6(x512)_a.png + :width: 400 + :alt: The graph shows the initial (bugged) case, whereby the two plots diverge early. + +.. image:: NB_cycle_stages_6(x512)_b.png + :width: 400 + :alt: The graph shows the final (fixed) case, whereby the two plots match closely. + +From the above graphs, it should be clear how logging can enable the tracing of differences between two models. Longer runs can be useful, as the impact of differences grow with time, however the key to identifying tends to lie in spotting where/when they begin, or the period in which they reoccur. In this example, the Python model is not able to perform as the same scale as FLAME GPU, so runs used a small agent population of around 100 agents, for under 100 steps. The cells have a periodisation of 24 steps, so this is still sufficient, despite in practice this model executing with 100,000 or more agents for 3000+ steps. + +.. _EnvironmentMacroPropertyCounters: + +Environment Macro Property Counters +----------------------------------- + +As mentioned in the previous example, it may be necessary to add counters to trace what proportion of agents are following each code-path. + +For example, given the below simplified agent function: + +.. tabs:: + + .. code-tab:: cuda CUDA C++ + + FLAMEGPU_AGENT_FUNCTION(NB_cell_cycle, flamegpu::MessageNone, flamegpu::MessageNone) { + + unsigned int s_cycle = FLAMEGPU->getVariable("cycle"); + const int s_neighbours = FLAMEGPU->getVariable("neighbours"); + const int s_ID2 = FLAMEGPU->getVariable("ID2"); + const float s_cycdiff = FLAMEGPU->getVariable("cycdiff"); + const int s_MAPK_RAS = FLAMEGPU->getVariable("MAPK_RAS"); + const int s_MYCN = FLAMEGPU->getVariable("MYCN"); + const int s_p21 = FLAMEGPU->getVariable("p21"); + const int s_p27 = FLAMEGPU->getVariable("p27"); + const int s_CDC25C = FLAMEGPU->getVariable("CDC25C"); + + const float P_cycle_nb = FLAMEGPU->environment.getProperty("P_cycle_nb"); + const bool dummy_ncycle = FLAMEGPU->random.uniform() < P_cycle_nb ? true : false; + + if (dummy_ncycle && s_neighbours <= 3) { + if (s_cycle < 12) { + if (s_cycle == 0) { + if (FLAMEGPU->random.uniform() < s_cycdiff) { + if (((s_MAPK_RAS == 1 || s_MYCN == 1) && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) { + s_cycle += 1; + } + } + } else if (((s_MAPK_RAS == 1 || s_MYCN == 1) && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) { + s_cycle += 1; + if (s_cycle >= 12 && ((s_MAPK_RAS == 1 && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) == 0) { + s_cycle -= 1; + } + } + } else if (s_cycle < 18) { + s_cycle += 1; + if (s_cycle >= 18 && s_CDC25C == 0) { + s_cycle -= 1; + } + } + } + FLAMEGPU->setVariable("cycle", s_cycle); + } + +It contains 9 ``if`` and ``else if`` statements, where agent's may diverge. + +If we wish to explore how many agents take each path, first it's necessary to extend the model's definition. + +.. tabs:: + + .. code-tab:: cpp C++ + + flamegpu::ModelDescription model("Counters Logging Example"); + + ... // Existing model definition + + // New components for counting + model.Environment().newMacroProperty("nb_cycle_counter"); + model.Environment().newProperty("nb_cycle_counter"); + + .. code-tab:: py Python + + model = pyflamegpu.ModelDescription ("Counters Logging Example") + + ... # Existing model definition + + // New components for counting + model.Environment().newMacroPropertyUInt("nb_cycle_counter", 9) + model.Environment().newPropertyArrayFloat("nb_cycle_counter", 9) + +A step function must also be added, to both copy the macro property to the environment property (to be logged), and to reset the macro property before the next step. + +.. tabs:: + + .. code-tab:: cpp C++ + + FLAMEGPU_STEP_FUNCTION(reset_counters) { + // Copy the data from macro environment property to environment property + const float NB_COUNT = static_cast(FLAMEGPU->agent("NB").count()); // Cast to avoid integer division + auto nb_cycle_counter = FLAMEGPU->environment.getMacroProperty("nb_cycle_counter"); + for (unsigned int i = 0; i < 9; ++i) { + // Normalise the data by dividing it by the number of agents + FLAMEGPU->environment.setProperty("nb_cycle_counter", i, nb_cycle_counter[i] / NB_COUNT); + } + // Reset the macro environment property + nb_cycle_counter.zero(); + } + + // Attach the step function to the model + model.newStepFunction(reset_counters); + + .. code-tab:: py Python + + class reset_counters(pyflamegpu.HostFunctionCallback): + def run(self,FLAMEGPU): + # Copy the data from macro environment property to environment property + NB_COUNT = FLAMEGPU.agent("NB").count() + nb_cycle_counter = FLAMEGPU.environment.getMacroPropertyUInt("nb_cycle_counter") + for i in range(9): + FLAMEGPU.environment.setPropertyArrayFloat("nb_cycle_counter", i, nb_cycle_counter[i] / NB_COUNT) + # Reset the macro environment property + nb_cycle_counter.zero() + + // Attach the step function to the model + model.addStepFunctionCallback(reset_counters().__disown__()) + +Now the agent function can be updated to increment the counters at each branch + +.. tabs:: + + .. code-tab:: cuda CUDA C++ + + FLAMEGPU_AGENT_FUNCTION(NB_cell_cycle, flamegpu::MessageNone, flamegpu::MessageNone) { + auto nb_cycle_counter = FLAMEGPU->environment.getMacroProperty("nb_cycle_counter"); + + unsigned int s_cycle = FLAMEGPU->getVariable("cycle"); + const int s_neighbours = FLAMEGPU->getVariable("neighbours"); + const int s_ID2 = FLAMEGPU->getVariable("ID2"); + const float s_cycdiff = FLAMEGPU->getVariable("cycdiff"); + const int s_MAPK_RAS = FLAMEGPU->getVariable("MAPK_RAS"); + const int s_MYCN = FLAMEGPU->getVariable("MYCN"); + const int s_p21 = FLAMEGPU->getVariable("p21"); + const int s_p27 = FLAMEGPU->getVariable("p27"); + const int s_CDC25C = FLAMEGPU->getVariable("CDC25C"); + + const float P_cycle_nb = FLAMEGPU->environment.getProperty("P_cycle_nb"); + const bool dummy_ncycle = FLAMEGPU->random.uniform() < P_cycle_nb ? true : false; + + if (dummy_ncycle && s_neighbours <= 3) { + ++nb_cycle_counter[0]; + if (s_cycle < 12) { + ++nb_cycle_counter[1]; + if (s_cycle == 0) { + ++nb_cycle_counter[2]; + if (FLAMEGPU->random.uniform() < s_cycdiff) { + ++nb_cycle_counter[3]; + if (((s_MAPK_RAS == 1 || s_MYCN == 1) && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) { + ++nb_cycle_counter[4]; + s_cycle += 1; + } + } + } else if (((s_MAPK_RAS == 1 || s_MYCN == 1) && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) { + ++nb_cycle_counter[5]; + s_cycle += 1; + if (s_cycle >= 12 && ((s_MAPK_RAS == 1 && s_p21 == 0 && s_p27 == 0) || s_ID2 == 1) == 0) { + ++nb_cycle_counter[6]; + s_cycle -= 1; + } + } + } else if (s_cycle < 18) { + ++nb_cycle_counter[7]; + s_cycle += 1; + if (s_cycle >= 18 && s_CDC25C == 0) { + ++nb_cycle_counter[8]; + s_cycle -= 1; + } + } + } + FLAMEGPU->setVariable("cycle", s_cycle); + } + +With all this setup, the environment property ``nb_cycle_counter`` will contain the proportion of agents which took each branch at the end of every step. This value can be logged and graphed to compare with other implementations. + + +Related Links +------------- +* User Guide: :ref:`Configuring Data to be Logged` +* User Guide: :ref:`Defining Environment Macro Properties` \ No newline at end of file diff --git a/src/guide/debugging-models/printf.rst b/src/guide/debugging-models/printf.rst new file mode 100644 index 000000000..69f15c12f --- /dev/null +++ b/src/guide/debugging-models/printf.rst @@ -0,0 +1,76 @@ +.. _debugging_with_printf: + +printf +====== + +.. Workaround for nested markup https://docutils.sourceforge.io/FAQ.html#is-nested-inline-markup-possible +.. |printf| replace:: ``printf()`` +.. _printf: https://cplusplus.com/reference/cstdio/printf +.. |print| replace:: ``print()`` +.. _print: https://docs.python.org/3/library/functions.html#print + +Using |printf|_ (or |print|_ within python) is the usual first step towards debugging. + +C++ and Python share very similar syntax + +.. tabs:: + + .. code-tab:: cpp C++ + + // Note C++ does not implicitly terminate the string with a line-break + printf("%f: %d\n", foo, bar); + +.. tabs:: + + .. code-tab:: py Python + + # Note Python implicitly terminates the string with a line-break + print('%f: %d'%(foo, bar)) + +These statements can be used in C++ and Python host functions respectively to print messages to console. + +Printing From Agent Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Agent functions are written using C++, so require the ``printf()`` syntax. :ref:`Python agent functions` will not transform the python ``print()`` to the C++ equivalent. + +However, as agent functions execute for potentially millions of agents in parallel there are some additional things which should be considered: + +**Performance** + +Printing from agent functions, causes data to be copied from the GPU in order for it to be printed. Large amounts of printing can have a large performance impact + +**Order** + +Populations of CUDA threads do not execute in lockstep or necessarily order, so messages printed by different agents are likely to occur out of order, it may be useful to include an identifier of each message's source agent (e.g. using the ``%u`` returned by ``FLAMEGPU->getID()``). + +**Volume** + +Attempting to ``printf()`` from millions of agents simultaneously can lead to programs crashing. There isn't a hard rule for how much is too much, but printing from agent functions should be limited to a subset of the agent population or performed with small agent populations. + +**Environment Macro Properties** + +Due to how ``printf()`` supports generic type arguments the implicit cast, normally performed when reading macro environment properties, is not performed. As such, attempting to print an environment macro property directly will lead to an undefined value being printed. The below code provides examples that should and should not be used. + +.. tabs:: + + .. code-tab:: cuda CUDA C++ + + FLAMEGPU_AGENT_FUNCTION(agent_fn1, flamegpu::MessageNone, flamegpu::MessageNone) { + // Retrieve the macro property + auto foo = FLAMEGPU->environment.getMacroProperty("foo"); + + // These can be used to print a property + printf("%d\n", (int)foo[0]); + printf("%d\n", static_cast(foo[1])); + const int bar = foo[2]; + printf("%d\n", bar); + + + // This should not be used, it will compile but produce bad output + printf("%d\n", foo[3]); + } + +Related Links +------------- +* User Guide: :ref:`Python Agent Functions` \ No newline at end of file diff --git a/src/guide/seatbelts/index.rst b/src/guide/debugging-models/seatbelts.rst similarity index 58% rename from src/guide/seatbelts/index.rst rename to src/guide/debugging-models/seatbelts.rst index 60976c907..8293a3c3c 100644 --- a/src/guide/seatbelts/index.rst +++ b/src/guide/debugging-models/seatbelts.rst @@ -15,9 +15,6 @@ Most ``SEATBELTS`` checks are limited to device code within agent functions (typ CUDA does not support safely exiting device code early, via an exception or similar. All CUDA errors raised from device code execution are considered fatal, whereby the CUDA runtime is corrupted and data cannot be recovered from device memory. As such, in order to provide exceptions from agent functions, it is necessary for ``SEATBELTS`` to both log detail about the problem to device memory and return a sensible value (normally ``0``) in order to allow the agent function to complete without raising a CUDA error. Our testing has not found any cases where this currently fails, however it may be possible to structure code such that ``SEATBELTS`` does not prevent a CUDA error. - - - Enabling/Disabling SEATBELTS ---------------------------- ``SEATBELTS`` is a compile-time feature, whereby the compiler passes the C macro of the same name to all files. As such, it can only be toggled at CMake time (as mentioned above it cannot be disabled for Debug builds). @@ -27,6 +24,35 @@ By default when configuring CMake ``SEATBELTS`` is ``ON``. In order to disable it, for the fastest performance ``-DSEATBELTS=OFF`` must be passed to ``cmake`` at configure time. If using ``cmake-gui``, you should locate the ``SEATBELTS`` option in the central table, set it to ``OFF`` and press the ``Configure`` button followed by the ``Generate`` button. +Understanding SEATBELTS Exceptions +---------------------------------- +When ``SEATBELTS`` detects a problem in an agent function it will cause a :class:`DeviceError` to be raised. This error contains a message detailing the problem, for example: + +.. code-block:: none + + Device function 'inputdata' reported 4000 errors. + First error: + inputdata_impl_curve_rtc_dynamic.h(187)[2,0,0][64,0,0]: + Agent variable 'x33' was not found during getVariable(). + +Below this message has been broken down: + +* ``Device function 'inputdata' reported 4000 errors`` + * ``inputdata`` is the name of the agent function which raised the error. + * ``4000`` is the number of agents which reported an error during the function +* ``First error:`` + * Only the first agent to report an error will have it's location and message returned. However in most cases, all agent's errors will be caused by the same bug. +* ``inputdata_impl_curve_rtc_dynamic.h(187)[2,0,0][64,0,0]:`` + * This is the location of the error within the FLAME GPU 2 source, it will usually point to internal headers so is unlikely to be useful to you. + * ``inputdata_impl_curve_rtc_dynamic.h`` refers to the dynamically generated RTC curve header for the agent function ``inputdata``. + * ``(187)`` states that the error was thrown from line 187 of the above file. + * ``[2,0,0][64,0,0]`` Specifies the CUDA block and thread indices of the reporting thread. Non-deterministic atomics are used to decide the first agent/thread, so this value is likely to change with repeated runs. +* ``Agent variable 'xx' was not found during getVariable().`` + * This is the bespoke message of the exception and will vary due to the cause. + * In this case it reports that the agent variable ``xx`` was requested, but does not exist. + +In most cases when FLAME GPU 2 raises an exception, the message will be printed to console by default. However, in some cases you may need to catch the exception and print it's message manually (using the standard C++/Python error-handling techniques). + Related Links ------------- diff --git a/src/guide/debugging-models/using-a-debugger.rst b/src/guide/debugging-models/using-a-debugger.rst new file mode 100644 index 000000000..2170c2592 --- /dev/null +++ b/src/guide/debugging-models/using-a-debugger.rst @@ -0,0 +1,139 @@ +.. _UsingADebugger: + +Using a Debugger +================ + +FLAME GPU 2 models are executed using highly parallel GPU code, this often makes use of traditional debugging tools significantly more challenging. As such, attaching your model to a debugger should really be a last resort. + +Debugging FLAME GPU 2 models follows a fairly standard approach to CUDA debugging. However, the approaches differ slightly dependent on your operating system (Windows/Linux) and whether you are using the C++ or Python API. This section details how to attach your model to a debugger, the user-guide for specified debuggers should be referred to for details on using the debugger. + +In order to debug your model, it is necessary to produce a debug build or use a debug build of pyflamegpu. This removes optimisations, and can lead to performance prohibitively slow for large models. + +.. note:: + + It is not currently possible to debug :ref:`Python agent functions` using either Windows or Linux. Furthermore, we were unable to attach the Windows CUDA debugger to :ref:`RTC agent functions`. + +Windows +------- + +On Windows `Nsight Visual Studio Edition `__ is used to debug CUDA projects. This installs as a Visual Studio extension, when installing CUDA (with Visual Studio pre-installed), and provides a graphical interface to debugging. + +C++ +~~~ + +In order to produce a debug build, it is necessary to ensure the active build configuration is set to ``Debug`` and ``x64``, prior to compilation, as shown in the image below. + +.. image:: visual_studio_build_config.png + :width: 313 + :alt: The active build configuration drop-downs in Visual Studio, setup for a Debug x64 build. + +Next, ensure your model is shown is the current startup project. The easiest way to check this, is check whether it's name appears bold within the Solution Explorer (shown for boids_spatial3D in the below image). If this is not the case, it can be right-clicked and the option "Set as Startup Project" used from the context menu. + +.. image:: visual_studio_startup_project.png + :width: 328 + :alt: The solution explorer with boids_spatial3D as the startup project. + +Now if your model has been built, you can launch it with NSight debugger, via the ``Extensions > Nsight`` menu. + +.. image:: visual_studio_start_debugger.png + :width: 521 + :alt: The Nsight menu. + +Currently Nsight has two different debuggers, "Next-Gen" and "Legacy". Whilst they use much the same debugging GUI, they have some key differences. + +The legacy debugger only supports the GPU architectures earlier than Pascal (e.g. Kepler/Maxwell) and can only debug GPU code (the normal Visual Studio debugger must be used to debug host code independently). + +The next-gen debugger only supports the GPU architectures Pascal and later and can debug GPU and CPU code simultaneously, however this can lead to challenges when placing breakpoints in files containing both CPU and GPU sources. Additionally the next-gen debugger does not have integrated support for the CUDA memory checker, instead the standalone tool `Compute Sanitizer `__ (which installs alongside CUDA) must be used. + + +.. note:: + + Launching Nsight does not force build your project when it is out of date, unlike the regular Visual Studio debugger. If you have updated your code, you must build it again manually before restarting Nsight debugger. + +Python +~~~~~~ + +Based on our testing, it is not currently possible to debug Python's runtime compiled agent functions on Windows, all attempts to break during execution of agent functions failed. + +It is however possible to attach visual studio's host-code debugger to the executing Python process and breakpoint into the main FLAME GPU library's code. + +First, it is necessary to be using a debug build of pyflamegpu. It is recommended that you compile this yourself, as you will require the full Visual Studio project in order to use the debugger effectively. Ensure debug is selected as the configuration before you begin compilation. + +.. image:: visual_studio_build_config.png + :width: 313 + :alt: The active build configuration drop-downs in Visual Studio, setup for a Debug x64 build. + +Next, begin executing your code (in a Python environment with the debug copy of pyflamegpu). You may wish to add a call to Python's ``input()``, to halt execution. + +Once your code is executing, you can open the attach to process dialogue, via the ``Debug > Attach to Process...`` menu. + +.. image:: visual_studio_attach_to_process_dialog.png + :width: 840 + :alt: The Nsight menu. + +From here, you may need to select to attach to "Native code" (Nsight options are also available, however we were unable to get them to work). + +Finally, you can search for Python processes. If your code is executing a visualisation, the window's title will be listed making it easy to identify the correct thread. Otherwise it may require some trial and error. + +After attaching to the process, you can create breakpoints within the host code of the flame gpu library which will halt when execution reaches them. Likewise, this should provide additional information in the case an exception is thrown. + + +Linux +----- + +On Linux `CUDA-GDB `__ is used to debug CUDA projects. This installs alongside CUDA on Linux, and is an extended version of the command-line debugger `GDB `__. + +C++ +~~~ + +In order to produce a debug build, it is necessary to specify ``-DCMAKE_BUILD_TYPE=Debug`` when configuring CMake. When compiled this should produce a binary in the ``build/bin/Debug`` directory. + +This can then be launched with ``cuda-gdb``, e.g. ``cuda-gdb --args my_model -s 10 -r 12``. + +From here follows the normal gdb workflow, such as ``start`` (start the program executing), ``break `` (specify a breakpoint using either ``:`` or ``:``, for example ``main.cu:86`` or ``main.cu:begin``), ``continue`` (continue from the implicit breakpoint at entry), ``backtrace`` (produce a stack trace after the debugger halts for a breakpoint or exception). Refer to the full `CUDA-GDB `__ and `GDB `__ guides for more detailed instructions. + +Python +~~~~~~ +In order to produce a debug build of pyflamegpu, it is necessary to specify ``-DCMAKE_BUILD_TYPE=Debug`` when configuring CMake. Additionally, you should pass ``-DEXPORT_RTC_SOURCES=ON``. When compiled this should produce a binary in the ``build/bin/Debug`` directory, and the compiled RTC sources will be exported to the working directory at runtime. + +.. note:: + + The dyanmic RTC curve header generated for each agent function shares the same name ``curve_rtc_dynamic.h``, but they will be exported with the name ``_curve_rtc_dynamic.h``. The debugger is not able to differentiate between the different rtc headers for breakpointing etc. + +You can now debug your pyflamegpu model with a fairly normal GDB workflow. + +For example: + +.. tabs:: + + .. code-tab:: bash Linux Bash/CUDA-GDB + + # Activate pyflamegpu virtual env + source build/lib/Debug/python/venv/bin/activate + # Navigate to model + cd examples/swig_boids_spatial3D + # Launch CUDA-GDB + cuda-gdb --args python boids_spatial3D.py -s 100 --verbose + # Start debugging (it will halt at entry to the program) + start + # Attach a breakpoint to an agent function + break outputdata_impl.cu:4 # Note that this is the agent function source exported by pyflamegpu + # Make breakpoint pending on future shared library load? (y or [n]) + y + # Attach a breakpoint to the core flamegpu library + break CUDASimulation.cu:simulate() + # Make breakpoint pending on future shared library load? (y or [n]) + y + # Continue from the initial breakpoint + cont + # CUDA-GDB will now trap when it reaches one of the breakpoints you specified or an exception is thrown + + +Related Links +------------- +* User Guide Page: :ref:`Building From Source` (C++) +* User Guide Page: :ref:`Building From Source` (Python) +* `Compute Sanitizer Manual `__ (CUDA MemCheck Tool) +* `Nsight Visual Studio Edition Manual `__ (Windows CUDA Debugger) +* `CUDA-GDB Manual `__ (Linux CUDA Debugger) +* `GDB Manual `__ (Linux Debugger) \ No newline at end of file diff --git a/src/guide/debugging-models/visual_studio_attach_to_process_dialog.png b/src/guide/debugging-models/visual_studio_attach_to_process_dialog.png new file mode 100644 index 000000000..91cf96db3 Binary files /dev/null and b/src/guide/debugging-models/visual_studio_attach_to_process_dialog.png differ diff --git a/src/guide/debugging-models/visual_studio_build_config.png b/src/guide/debugging-models/visual_studio_build_config.png new file mode 100644 index 000000000..de8b2fc9c Binary files /dev/null and b/src/guide/debugging-models/visual_studio_build_config.png differ diff --git a/src/guide/debugging-models/visual_studio_start_debugger.png b/src/guide/debugging-models/visual_studio_start_debugger.png new file mode 100644 index 000000000..dab3defa3 Binary files /dev/null and b/src/guide/debugging-models/visual_studio_start_debugger.png differ diff --git a/src/guide/debugging-models/visual_studio_startup_project.png b/src/guide/debugging-models/visual_studio_startup_project.png new file mode 100644 index 000000000..f5a7875df Binary files /dev/null and b/src/guide/debugging-models/visual_studio_startup_project.png differ diff --git a/src/guide/index.rst b/src/guide/index.rst index 21a5e73d7..b374b926d 100644 --- a/src/guide/index.rst +++ b/src/guide/index.rst @@ -18,6 +18,6 @@ This user guide provides a clear introduction to the core features of FLAME GPU running-a-simulation/index.rst running-multiple-simulations/index.rst visualisation/index.rst + debugging-models/index.rst performance-troubleshooting/index.rst - flamegpu2-source/index.rst - seatbelts/index.rst \ No newline at end of file + flamegpu2-source/index.rst \ No newline at end of file diff --git a/src/guide/running-multiple-simulations/index.rst b/src/guide/running-multiple-simulations/index.rst index 015a6b50d..159a5824d 100644 --- a/src/guide/running-multiple-simulations/index.rst +++ b/src/guide/running-multiple-simulations/index.rst @@ -1,3 +1,5 @@ +.. _ensembles: + Running Multiple Simulations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^