From b90b87eb47b82333ad2f1797b267e7986741b27a Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 06:14:09 -0400 Subject: [PATCH 01/10] add instrumentation header and set up CMake. Also tidy up the cmake options for the project a bit --- .github/workflows/CI-build-and-test.yml | 2 +- CMakeLists.txt | 24 +++++++++++++++----- nuTens/CMakeLists.txt | 30 +++++++++++++++++-------- nuTens/instrumentation.hpp | 0 nuTens/nuTens-pch.hpp | 1 + 5 files changed, 42 insertions(+), 15 deletions(-) create mode 100644 nuTens/instrumentation.hpp diff --git a/.github/workflows/CI-build-and-test.yml b/.github/workflows/CI-build-and-test.yml index 48b0086..31f6d4e 100644 --- a/.github/workflows/CI-build-and-test.yml +++ b/.github/workflows/CI-build-and-test.yml @@ -38,7 +38,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -DTEST_COVERAGE=ON -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -DNT_TEST_COVERAGE=ON -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build # Build your program with the given configuration diff --git a/CMakeLists.txt b/CMakeLists.txt index 45050d6..d42f97d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,9 +3,9 @@ set(CMAKE_CXX_STANDARD 17) project(nuTens) -OPTION(TEST_COVERAGE "TEST_COVERAGE" OFF) +OPTION(NT_TEST_COVERAGE "produce code coverage reports when running tests" OFF) -IF(TEST_COVERAGE) +IF(NT_TEST_COVERAGE) message("Adding flags to check test coverage") add_compile_options("--coverage") add_link_options("--coverage") @@ -22,8 +22,8 @@ CPMAddPackage("gh:gabime/spdlog@1.8.2") ## check build times ## have this optional as it's not supported on all CMake platforms -OPTION(BUILD_TIMING "output time to build each target" OFF) -IF(BUILD_TIMING) +OPTION(NT_BUILD_TIMING "output time to build each target" OFF) +IF(NT_BUILD_TIMING) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time") ENDIF() @@ -33,4 +33,18 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") add_subdirectory(nuTens) -add_subdirectory(tests) \ No newline at end of file +add_subdirectory(tests) + + +message( STATUS "The following variables have been used to configure the build: " ) +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + unset(MATCHED) + string(REGEX MATCH "^NT_*" MATCHED ${_variableName}) + if (NOT MATCHED) + continue() + endif() + + message(STATUS " ${_variableName}=${${_variableName}}") +endforeach() \ No newline at end of file diff --git a/nuTens/CMakeLists.txt b/nuTens/CMakeLists.txt index 24c32cf..30f8314 100644 --- a/nuTens/CMakeLists.txt +++ b/nuTens/CMakeLists.txt @@ -1,14 +1,15 @@ - -## set up logging +######################## +#### set up logging #### +######################## add_library(logging logging.hpp) target_link_libraries(logging spdlog::spdlog) set_target_properties(logging PROPERTIES LINKER_LANGUAGE CXX) -set( LOG_LEVEL "INFO" CACHE STRING "the level of detail to log to the console" ) +set( NT_LOG_LEVEL "INFO" CACHE STRING "the level of detail to log to the console" ) -## Convert LOG_LEVEL to all upper case so that we aren't case sensitive to user input -string( TOUPPER "${LOG_LEVEL}" LOG_LEVEL_UPPER ) +## Convert NT_LOG_LEVEL to all upper case so that we aren't case sensitive to user input +string( TOUPPER "${NT_LOG_LEVEL}" LOG_LEVEL_UPPER ) ## Check the specified log level is valid set(VALID_LOG_OPTIONS SILENT ERROR WARNING INFO DEBUG TRACE) @@ -24,17 +25,28 @@ target_compile_definitions(logging PUBLIC NT_LOG_LEVEL=NT_LOG_LEVEL_${LOG_LEVEL_ +################################ +#### set up instrumentation #### +################################ +add_library(instrumentation instrumentation.hpp) +set_target_properties(instrumentation PROPERTIES LINKER_LANGUAGE CXX) + +option( NT_PROFILING "enable profiling of the code" OFF ) +if( NT_PROFILING ) + target_compile_definitions( instrumentation PUBLIC USE_PROFILING ) +endif() + ## if user wants to use pch then we use the pch ## people, especially if developing, might want to use this as including tensor related things ## can be excruciatingly sloow when building -OPTION(USE_PCH "USE_PCH" OFF) -IF(USE_PCH) +OPTION(NT_USE_PCH "NT_USE_PCH" OFF) +IF(NT_USE_PCH) message("Using precompiled header") add_library(nuTens-pch nuTens-pch.hpp) - SET(PCH_LIBS "${PCH_LIBS};logging") + SET(PCH_LIBS "${PCH_LIBS};logging;instrumentation") ## the headers included in the PCH will (at some point) depend on which tensor library is being used IF(TORCH_FOUND) @@ -50,7 +62,7 @@ IF(USE_PCH) target_precompile_headers(nuTens-pch PUBLIC nuTens-pch.hpp) set_target_properties(nuTens-pch PROPERTIES LINKER_LANGUAGE CXX) -ENDIF() ## end USE_PCH block +ENDIF() ## end NT_USE_PCH block diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp new file mode 100644 index 0000000..e69de29 diff --git a/nuTens/nuTens-pch.hpp b/nuTens/nuTens-pch.hpp index d9fcf0b..973c409 100644 --- a/nuTens/nuTens-pch.hpp +++ b/nuTens/nuTens-pch.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include From a8862b7eec59abfc0b2b8bc2b4d696de96cfb833 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 10:00:16 -0400 Subject: [PATCH 02/10] add functionality to do instrumented profiling --- nuTens/instrumentation.hpp | 197 +++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp index e69de29..dbc070c 100644 --- a/nuTens/instrumentation.hpp +++ b/nuTens/instrumentation.hpp @@ -0,0 +1,197 @@ +#pragma once + +#include +#include +#include +#include +#include + +/*! \file instrumentation.hpp + \brief Define utilities for instrumentation of the code + + This is the home of anything that gets placed inside other classes or functions in order to instrument the code. + e.g. for profiling or debugging. Everything should ideally be macro-fied so it can be included only for certain + builds, or specified by build time options. +*/ + +struct ProfileResult +{ + /// @brief Hold the results of a profiled function to be written out. + + std::string name; + long start; + long end; + uint32_t threadID; +}; + +class ProfileWriter +{ + /*! @class ProfileWriter + * @brief Singleton class to collect timing information for functions and write out to a file that can be inspected + * later with visual profiling tool + * + * Writes out profiling information in a json format readable by chrome tracing + * (https://www.chromium.org/developers/how-tos/trace-event-profiling-tool/) Use the macros provided to instrument + * the code like: \code{.cpp} + * + * \code{.cpp} + * + * Then open up your favourite chromium based browser and go to chrome://tracing. You can then just drag and drop + * the profiling json file and should see a lovely display of the collected profile information. + */ + + /// \todo currently only suppor the format used by chrome tracing. Would be nice to support other formats too. + /// Should just be a case of adding additional option for writeProfile and header and footer + + public: + /// @brief Constructor + ProfileWriter() : _name(""), _profileCount(0) + { + } + + /// @brief Set up the session + /// @param[in] name The name of the timer + /// @param[in] filePath The destination of the output file + void beginSession(const std::string &name, const std::string &filePath = "results.json") + { + _outputStream.open(filePath); + writeHeader(); + _name = name; + } + + /// @brief Close the session and clean up + void endSession() + { + writeFooter(); + _outputStream.close(); + _name = ""; + _profileCount = 0; + } + + /// @brief Write out the results of a profiled function + /// @param[in] result The result to write + void writeProfile(const ProfileResult &result) + { + if (_profileCount++ > 0) + { + _outputStream << ","; + } + + std::string name = result.name; + std::replace(name.begin(), name.end(), '"', '\''); + + _outputStream << "{"; + _outputStream << "\"cat\":\"function\","; + _outputStream << "\"dur\":" << (result.end - result.start) << ','; + _outputStream << "\"name\":\"" << name << "\","; + _outputStream << "\"ph\":\"X\","; + _outputStream << "\"pid\":0,"; + _outputStream << "\"tid\":" << result.threadID << ","; + _outputStream << "\"ts\":" << result.start; + _outputStream << "}"; + + _outputStream.flush(); + } + + /// @brief Write the file header + void writeHeader() + { + _outputStream << "{\"otherData\": {},\"traceEvents\":["; + _outputStream.flush(); + } + + /// @brief Write the file footer + void writeFooter() + { + _outputStream << "]}"; + _outputStream.flush(); + } + + /// @brief Get a reference to the ProfileWriter, if it has not yet been instantiated, this will do so + static ProfileWriter &get() + { + static ProfileWriter instance; // this will be instantiated the first time ProfileWriter::get() is called and + // killed at the end of the program + return instance; + } + + private: + std::string _name; + std::ofstream _outputStream; + uint _profileCount; +}; + +class InstrumentationTimer +/*! + * @class InstrumentationTimer + * @brief Class to perform the actual timing + * + * + * + */ +{ + public: + /// @brief Construct an InstrumentationTimer object and start the clock + /// @param[in] name The name of the profile. Typically use __FUNCSIG__ so it's clear which part of the code is being + /// profiled. + InstrumentationTimer(const std::string &name) : _name(name), _stopped(false) + { + _startTimepoint = std::chrono::high_resolution_clock::now(); + } + + /// @brief Destroy the timer object and stop the timer by calling stop() + ~InstrumentationTimer() + { + if (!_stopped) + stop(); + } + + /// @brief Stop the timer and write out the profile result using the ProfileWriter + void stop() + { + auto endTimepoint = std::chrono::high_resolution_clock::now(); + + long long start = + std::chrono::time_point_cast(_startTimepoint).time_since_epoch().count(); + long long end = + std::chrono::time_point_cast(endTimepoint).time_since_epoch().count(); + + uint32_t threadID = std::hash{}(std::this_thread::get_id()); + ProfileWriter::get().writeProfile({_name, start, end, threadID}); + + _stopped = true; + } + + private: + std::string _name; + std::chrono::time_point _startTimepoint; + bool _stopped; +}; + +/// @brief Begin a profiling session +/// Will open up the results json file and set things up. +/// If USE_PROFILING not defined will be empty so that it can be stripped from non-debug builds +/// @param[in] sessionName The name of the session +#ifdef USE_PROFILING +// NOLINTNEXTLINE +#define NT_PROFILE_BEGINSESSION(sessionName) \ + ProfileWriter::get().beginSession(sessionName, std::string(sessionName) + "-results.json") +#else +#define NT_PROFILE_BEGINSESSION(sessionName) +#endif + +/// @brief Profile the current scope +#ifdef USE_PROFILING +// NOLINTNEXTLINE +#define NT_PROFILE() InstrumentationTimer timer##__LINE__(std::string(__PRETTY_FUNCTION__)) +#else +#define NT_PROFILE() +#endif + +/// @brief End the profiling session +#ifdef USE_PROFILING +// NOLINTNEXTLINE +#define NT_PROFILE_ENDSESSION() ProfileWriter::get().endSession() +#else +#define NT_PROFILE_ENDSESSION() +#endif \ No newline at end of file From 8733c8bf8498927500c60cf0ec101a993c91425b Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 10:02:10 -0400 Subject: [PATCH 03/10] integrate profiling into Tensor --- nuTens/tensors/CMakeLists.txt | 1 + nuTens/tensors/tensor.hpp | 9 +++ nuTens/tensors/torch-tensor.cpp | 104 ++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/nuTens/tensors/CMakeLists.txt b/nuTens/tensors/CMakeLists.txt index 949918b..4ba9c23 100644 --- a/nuTens/tensors/CMakeLists.txt +++ b/nuTens/tensors/CMakeLists.txt @@ -18,5 +18,6 @@ ELSE() ENDIF() ENDIF() +target_link_libraries(tensor PUBLIC instrumentation) target_include_directories(tensor PUBLIC "${CMAKE_SOURCE_DIR}") set_target_properties(tensor PROPERTIES LINKER_LANGUAGE CXX) \ No newline at end of file diff --git a/nuTens/tensors/tensor.hpp b/nuTens/tensors/tensor.hpp index 31f2a2d..ae1e4ae 100644 --- a/nuTens/tensors/tensor.hpp +++ b/nuTens/tensors/tensor.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -275,6 +276,8 @@ class Tensor /// @arg indices The indices of the value to set template inline T getValue(const std::vector &indices) { + NT_PROFILE(); + std::vector indicesVec; indicesVec.reserve(indices.size()); for (const int &i : indices) @@ -288,6 +291,9 @@ class Tensor /// Get the value of a size 0 tensor (scalar) template inline T getValue() { + + NT_PROFILE(); + return _tensor.item(); } #endif @@ -299,6 +305,9 @@ class Tensor public: [[nodiscard]] inline const torch::Tensor &getTensor() const { + + NT_PROFILE(); + return _tensor; } diff --git a/nuTens/tensors/torch-tensor.cpp b/nuTens/tensors/torch-tensor.cpp index fab7b13..203f5e0 100644 --- a/nuTens/tensors/torch-tensor.cpp +++ b/nuTens/tensors/torch-tensor.cpp @@ -19,6 +19,8 @@ std::string Tensor::getTensorLibrary() Tensor &Tensor::ones(int length, NTdtypes::scalarType type, NTdtypes::deviceType device, bool requiresGrad) { + NT_PROFILE(); + _tensor = torch::ones(length, torch::TensorOptions() .dtype(scalarTypeMap.at(type)) .device(deviceTypeMap.at(device)) @@ -30,6 +32,8 @@ Tensor &Tensor::ones(int length, NTdtypes::scalarType type, NTdtypes::deviceType Tensor &Tensor::ones(const std::vector &shape, NTdtypes::scalarType type, NTdtypes::deviceType device, bool requiresGrad) { + NT_PROFILE(); + _tensor = torch::ones(c10::IntArrayRef(shape), torch::TensorOptions() .dtype(scalarTypeMap.at(type)) .device(deviceTypeMap.at(device)) @@ -39,6 +43,8 @@ Tensor &Tensor::ones(const std::vector &shape, NTdtypes::scalarType ty Tensor &Tensor::zeros(int length, NTdtypes::scalarType type, NTdtypes::deviceType device, bool requiresGrad) { + NT_PROFILE(); + _tensor = torch::zeros(length, scalarTypeMap.at(type)); return *this; } @@ -46,30 +52,40 @@ Tensor &Tensor::zeros(int length, NTdtypes::scalarType type, NTdtypes::deviceTyp Tensor &Tensor::zeros(const std::vector &shape, NTdtypes::scalarType type, NTdtypes::deviceType device, bool requiresGrad) { + NT_PROFILE(); + _tensor = torch::zeros(c10::IntArrayRef(shape), scalarTypeMap.at(type)); return *this; } Tensor &Tensor::dType(NTdtypes::scalarType type) { + NT_PROFILE(); + _tensor = _tensor.to(scalarTypeMap.at(type)); return *this; } Tensor &Tensor::device(NTdtypes::deviceType device) { + NT_PROFILE(); + _tensor = _tensor.to(deviceTypeMap.at(device)); return *this; } Tensor &Tensor::requiresGrad(bool reqGrad) { + NT_PROFILE(); + _tensor = _tensor.set_requires_grad(reqGrad); return *this; } Tensor Tensor::getValue(const std::vector &indices) const { + NT_PROFILE(); + std::vector indicesVec; for (const Tensor::indexType &i : indices) { @@ -95,11 +111,15 @@ Tensor Tensor::getValue(const std::vector &indices) const void Tensor::setValue(const Tensor &indices, const Tensor &value) { + NT_PROFILE(); + _tensor.index_put_({indices._tensor}, value._tensor); } void Tensor::setValue(const std::vector &indices, const Tensor &value) { + NT_PROFILE(); + std::vector indicesVec; for (const Tensor::indexType &i : indices) { @@ -123,6 +143,8 @@ void Tensor::setValue(const std::vector &indices, const Tenso void Tensor::setValue(const std::vector &indices, float value) { + NT_PROFILE(); + std::vector indicesVec; indicesVec.reserve(indices.size()); for (const int &i : indices) @@ -135,6 +157,8 @@ void Tensor::setValue(const std::vector &indices, float value) void Tensor::setValue(const std::vector &indices, std::complex value) { + NT_PROFILE(); + std::vector indicesVec; indicesVec.reserve(indices.size()); for (const int &i : indices) @@ -147,16 +171,22 @@ void Tensor::setValue(const std::vector &indices, std::complex value size_t Tensor::getNdim() const { + NT_PROFILE(); + return _tensor._dimI(); } int Tensor::getBatchDim() const { + NT_PROFILE(); + return _tensor.sizes()[0]; } std::vector Tensor::getShape() const { + NT_PROFILE(); + std::vector ret(getNdim()); for (size_t i = 0; i < getNdim(); i++) { @@ -167,6 +197,8 @@ std::vector Tensor::getShape() const Tensor Tensor::matmul(const Tensor &t1, const Tensor &t2) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::matmul(t1._tensor, t2._tensor); return ret; @@ -174,6 +206,8 @@ Tensor Tensor::matmul(const Tensor &t1, const Tensor &t2) Tensor Tensor::outer(const Tensor &t1, const Tensor &t2) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::outer(t1._tensor, t2._tensor); return ret; @@ -181,6 +215,8 @@ Tensor Tensor::outer(const Tensor &t1, const Tensor &t2) Tensor Tensor::mul(const Tensor &t1, const Tensor &t2) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::mul(t1._tensor, t2._tensor); return ret; @@ -188,6 +224,8 @@ Tensor Tensor::mul(const Tensor &t1, const Tensor &t2) Tensor Tensor::div(const Tensor &t1, const Tensor &t2) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::div(t1._tensor, t2._tensor); return ret; @@ -195,6 +233,8 @@ Tensor Tensor::div(const Tensor &t1, const Tensor &t2) Tensor Tensor::pow(const Tensor &t, float s) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::pow(t._tensor, s); return ret; @@ -202,6 +242,8 @@ Tensor Tensor::pow(const Tensor &t, float s) Tensor Tensor::pow(const Tensor &t, std::complex s) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::pow(t._tensor, c10::complex(s.real(), s.imag())); return ret; @@ -209,6 +251,8 @@ Tensor Tensor::pow(const Tensor &t, std::complex s) Tensor Tensor::exp(const Tensor &t) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::exp(t._tensor); return ret; @@ -216,6 +260,8 @@ Tensor Tensor::exp(const Tensor &t) Tensor Tensor::transpose(const Tensor &t, int dim1, int dim2) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::transpose(t._tensor, dim1, dim2); return ret; @@ -223,6 +269,8 @@ Tensor Tensor::transpose(const Tensor &t, int dim1, int dim2) Tensor Tensor::scale(const Tensor &t, float s) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::multiply(t._tensor, s); return ret; @@ -230,6 +278,8 @@ Tensor Tensor::scale(const Tensor &t, float s) Tensor Tensor::scale(const Tensor &t, std::complex s) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::multiply(t._tensor, c10::complex(s.real(), s.imag())); return ret; @@ -237,51 +287,71 @@ Tensor Tensor::scale(const Tensor &t, std::complex s) void Tensor::matmul_(const Tensor &t2) { + NT_PROFILE(); + _tensor = torch::matmul(_tensor, t2._tensor); } void Tensor::mul_(const Tensor &t2) { + NT_PROFILE(); + _tensor = torch::mul(_tensor, t2._tensor); } void Tensor::div_(const Tensor &t2) { + NT_PROFILE(); + _tensor = torch::div(_tensor, t2._tensor); } void Tensor::scale_(float s) { + NT_PROFILE(); + _tensor = torch::multiply(_tensor, s); } void Tensor::scale_(std::complex s) { + NT_PROFILE(); + _tensor = torch::multiply(_tensor, c10::complex(s.real(), s.imag())); } void Tensor::pow_(float s) { + NT_PROFILE(); + _tensor = torch::pow(_tensor, s); } void Tensor::pow_(std::complex s) { + NT_PROFILE(); + _tensor = torch::pow(_tensor, c10::complex(s.real(), s.imag())); } void Tensor::exp_() { + NT_PROFILE(); + _tensor = torch::exp(_tensor); } void Tensor::transpose_(int dim1, int dim2) { + NT_PROFILE(); + _tensor = torch::transpose(_tensor, dim1, dim2); } void Tensor::eig(const Tensor &t, Tensor &eVals, Tensor &eVecs) { + NT_PROFILE(); + auto ret = torch::linalg_eig(t._tensor); eVals._tensor = std::get<1>(ret); eVecs._tensor = std::get<0>(ret); @@ -289,6 +359,8 @@ void Tensor::eig(const Tensor &t, Tensor &eVals, Tensor &eVecs) Tensor Tensor::real() const { + NT_PROFILE(); + Tensor ret; ret._tensor = at::real(_tensor); return ret; @@ -296,6 +368,8 @@ Tensor Tensor::real() const Tensor Tensor::imag() const { + NT_PROFILE(); + Tensor ret; ret._tensor = at::imag(_tensor); return ret; @@ -303,6 +377,8 @@ Tensor Tensor::imag() const Tensor Tensor::conj() const { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::conj(_tensor); // torch::conj() returns a view of the original tensor @@ -313,6 +389,8 @@ Tensor Tensor::conj() const Tensor Tensor::abs() const { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::abs(_tensor); return ret; @@ -320,6 +398,8 @@ Tensor Tensor::abs() const Tensor Tensor::angle() const { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::angle(_tensor); return ret; @@ -327,16 +407,22 @@ Tensor Tensor::angle() const bool Tensor::operator==(const Tensor &rhs) const { + NT_PROFILE(); + return at::equal(_tensor, rhs._tensor); } bool Tensor::operator!=(const Tensor &rhs) const { + NT_PROFILE(); + return !at::equal(_tensor, rhs._tensor); } Tensor Tensor::operator+(const Tensor &rhs) const { + NT_PROFILE(); + Tensor ret; ret._tensor = _tensor + rhs._tensor; return ret; @@ -344,6 +430,8 @@ Tensor Tensor::operator+(const Tensor &rhs) const Tensor Tensor::operator-(const Tensor &rhs) const { + NT_PROFILE(); + Tensor ret; ret._tensor = _tensor - rhs._tensor; return ret; @@ -351,6 +439,8 @@ Tensor Tensor::operator-(const Tensor &rhs) const Tensor Tensor::operator-() const { + NT_PROFILE(); + Tensor ret; ret._tensor = -_tensor; return ret; @@ -358,6 +448,8 @@ Tensor Tensor::operator-() const Tensor Tensor::cumsum(int dim) const { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::cumsum(_tensor, dim); return ret; @@ -365,6 +457,8 @@ Tensor Tensor::cumsum(int dim) const Tensor Tensor::sum() const { + NT_PROFILE(); + Tensor ret; ret._tensor = _tensor.sum(); return ret; @@ -372,11 +466,15 @@ Tensor Tensor::sum() const void Tensor::backward() const { + NT_PROFILE(); + _tensor.backward(); } Tensor Tensor::grad() const { + NT_PROFILE(); + Tensor ret; ret._tensor = _tensor.grad(); return ret; @@ -384,6 +482,8 @@ Tensor Tensor::grad() const Tensor Tensor::sin(const Tensor &t) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::sin(t._tensor); return ret; @@ -391,6 +491,8 @@ Tensor Tensor::sin(const Tensor &t) Tensor Tensor::cos(const Tensor &t) { + NT_PROFILE(); + Tensor ret; ret._tensor = torch::cos(t._tensor); return ret; @@ -398,6 +500,8 @@ Tensor Tensor::cos(const Tensor &t) std::string Tensor::toString() const { + NT_PROFILE(); + std::ostringstream stream; stream << _tensor; return stream.str(); From 96d76856dbafe86a6c736c3a88944fe97c982ea4 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 10:04:31 -0400 Subject: [PATCH 04/10] add instrumentation to propagators --- nuTens/propagator/CMakeLists.txt | 2 +- nuTens/propagator/base-matter-solver.hpp | 1 + nuTens/propagator/const-density-solver.cpp | 1 + nuTens/propagator/const-density-solver.hpp | 2 ++ nuTens/propagator/propagator.cpp | 4 ++++ nuTens/propagator/propagator.hpp | 4 ++++ 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nuTens/propagator/CMakeLists.txt b/nuTens/propagator/CMakeLists.txt index 4661f76..ad37e47 100644 --- a/nuTens/propagator/CMakeLists.txt +++ b/nuTens/propagator/CMakeLists.txt @@ -13,7 +13,7 @@ IF(USE_PCH) target_precompile_headers(propagator REUSE_FROM tensor) ENDIF() -target_link_libraries(propagator PUBLIC tensor constants) +target_link_libraries(propagator PUBLIC tensor constants instrumentation) target_include_directories(propagator PUBLIC "${CMAKE_SOURCE_DIR}") set_target_properties(propagator PROPERTIES LINKER_LANGUAGE CXX) diff --git a/nuTens/propagator/base-matter-solver.hpp b/nuTens/propagator/base-matter-solver.hpp index 045af13..c7fd5e7 100644 --- a/nuTens/propagator/base-matter-solver.hpp +++ b/nuTens/propagator/base-matter-solver.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include class BaseMatterSolver diff --git a/nuTens/propagator/const-density-solver.cpp b/nuTens/propagator/const-density-solver.cpp index a409528..a6fa482 100644 --- a/nuTens/propagator/const-density-solver.cpp +++ b/nuTens/propagator/const-density-solver.cpp @@ -2,6 +2,7 @@ void ConstDensityMatterSolver::calculateEigenvalues(const Tensor &energies, Tensor &eigenvectors, Tensor &eigenvalues) { + NT_PROFILE(); Tensor hamiltonian; hamiltonian.zeros({energies.getBatchDim(), nGenerations, nGenerations}, NTdtypes::kComplexFloat); diff --git a/nuTens/propagator/const-density-solver.hpp b/nuTens/propagator/const-density-solver.hpp index c5c4a9c..26c87ee 100644 --- a/nuTens/propagator/const-density-solver.hpp +++ b/nuTens/propagator/const-density-solver.hpp @@ -44,6 +44,7 @@ class ConstDensityMatterSolver : public BaseMatterSolver /// @param newPMNS The new matrix to set inline void setPMNS(const Tensor &newPMNS) { + NT_PROFILE(); PMNS = newPMNS; // construct the outer product of the electron neutrino row of the PMNS @@ -56,6 +57,7 @@ class ConstDensityMatterSolver : public BaseMatterSolver /// @param newMasses The new masses inline void setMasses(const Tensor &newMasses) { + NT_PROFILE(); masses = newMasses; // construct the diagonal mass^2 matrix used in the hamiltonian diff --git a/nuTens/propagator/propagator.cpp b/nuTens/propagator/propagator.cpp index 648fb0a..e1dd4b1 100644 --- a/nuTens/propagator/propagator.cpp +++ b/nuTens/propagator/propagator.cpp @@ -2,6 +2,8 @@ Tensor Propagator::calculateProbs(const Tensor &energies) const { + NT_PROFILE(); + Tensor ret; // if a matter solver was specified, use effective values for masses and PMNS @@ -27,6 +29,8 @@ Tensor Propagator::calculateProbs(const Tensor &energies) const Tensor Propagator::_calculateProbs(const Tensor &energies, const Tensor &massesSq, const Tensor &PMNS) const { + NT_PROFILE(); + Tensor weightMatrix; weightMatrix.ones({energies.getBatchDim(), _nGenerations, _nGenerations}, NTdtypes::kComplexFloat) .requiresGrad(false); diff --git a/nuTens/propagator/propagator.hpp b/nuTens/propagator/propagator.hpp index 890f414..f949b76 100644 --- a/nuTens/propagator/propagator.hpp +++ b/nuTens/propagator/propagator.hpp @@ -40,6 +40,7 @@ class Propagator /// @param newSolver A derivative of BaseMatterSolver inline void setMatterSolver(std::unique_ptr &newSolver) { + NT_PROFILE(); _matterSolver = std::move(newSolver); _matterSolver->setMasses(_masses); _matterSolver->setPMNS(_pmnsMatrix); @@ -67,6 +68,7 @@ class Propagator /// @param newPMNS The new matrix to use inline void setPMNS(Tensor &newPMNS) { + NT_PROFILE(); _pmnsMatrix = newPMNS; if (_matterSolver != nullptr) { @@ -83,6 +85,7 @@ class Propagator /// @param value The new value inline void setPMNS(const std::vector &indices, float value) { + NT_PROFILE(); _pmnsMatrix.setValue(indices, value); } @@ -91,6 +94,7 @@ class Propagator /// @param value The new value inline void setPMNS(const std::vector &indices, std::complex value) { + NT_PROFILE(); _pmnsMatrix.setValue(indices, value); } From 4f6da3dbd57f020f09bee209ec5aa3dd3c9bcef1 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 10:10:23 -0400 Subject: [PATCH 05/10] add start and stop for instrumentation sessions to the test apps --- tests/CMakeLists.txt | 2 +- tests/tensor-basic.cpp | 6 ++++++ tests/two-flavour-const-matter.cpp | 6 ++++++ tests/two-flavour-vacuum.cpp | 7 +++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a5c0e1a..2820247 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,7 +13,7 @@ foreach(TESTNAME target_precompile_headers("${TESTNAME}" REUSE_FROM tensor) ENDIF() - target_link_libraries("${TESTNAME}" PUBLIC test-utils tensor propagator) + target_link_libraries("${TESTNAME}" PUBLIC test-utils tensor propagator instrumentation) target_include_directories("${TESTNAME}" PUBLIC "${CMAKE_SOURCE_DIR}") add_test(NAME "${TESTNAME}-test" COMMAND "${TESTNAME}") diff --git a/tests/tensor-basic.cpp b/tests/tensor-basic.cpp index db26407..514d070 100644 --- a/tests/tensor-basic.cpp +++ b/tests/tensor-basic.cpp @@ -9,6 +9,10 @@ int main() { + NT_PROFILE_BEGINSESSION("tensor-basic-test"); + + NT_PROFILE(); + std::cout << "Tensor library: " << Tensor::getTensorLibrary() << std::endl; std::cout << "########################################" << std::endl; @@ -135,4 +139,6 @@ int main() std::cout << complexGradTest.grad().real() << std::endl; std::cout << " Imag: " << std::endl; std::cout << complexGradTest.grad().imag() << std::endl << std::endl; + + NT_PROFILE_ENDSESSION(); } \ No newline at end of file diff --git a/tests/two-flavour-const-matter.cpp b/tests/two-flavour-const-matter.cpp index 4a8c577..2f93bee 100644 --- a/tests/two-flavour-const-matter.cpp +++ b/tests/two-flavour-const-matter.cpp @@ -6,6 +6,10 @@ using namespace Testing; int main() { + NT_PROFILE_BEGINSESSION("two-flavour-const-matter-test"); + + NT_PROFILE(); + float m1 = 1.0; float m2 = 2.0; float energy = 100.0; @@ -87,4 +91,6 @@ int main() std::cout << "###############################" << std::endl << std::endl; } + + NT_PROFILE_ENDSESSION(); } \ No newline at end of file diff --git a/tests/two-flavour-vacuum.cpp b/tests/two-flavour-vacuum.cpp index 3126004..e17887b 100644 --- a/tests/two-flavour-vacuum.cpp +++ b/tests/two-flavour-vacuum.cpp @@ -6,6 +6,11 @@ using namespace Testing; int main() { + + NT_PROFILE_BEGINSESSION("two-flavour-vacuum-test"); + + NT_PROFILE(); + float m1 = 0.1; float m2 = 0.5; float energy = 1.0; @@ -61,4 +66,6 @@ int main() TEST_EXPECTED(probabilities.getValue({0, 1, 0}), bargerProp.calculateProb(energy, 1, 0), "probability for alpha == 1, beta == 0", 0.00001) } + + NT_PROFILE_ENDSESSION(); } \ No newline at end of file From b3cb6b8f2be46d5c99435b53936b8db90416f15f Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 15:11:38 +0100 Subject: [PATCH 06/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 396ba17..47f54a5 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ make test - [x] Doxygen documentation with automatic deployment - [x] Add test coverage checks into CI - [x] Integrate linting ( [cpp-linter](https://github.com/cpp-linter)? ) -- [ ] Add instrumentation library for benchmarking and profiling +- [x] Add instrumentation library for benchmarking and profiling - [ ] Add suite of benchmarking tests - [ ] Integrate benchmarks into CI ( maybe use [hyperfine](https://github.com/sharkdp/hyperfine) and [bencher](https://bencher.dev/) for this? ) - [ ] Add proper unit tests From 336f4801c7cb81cb3cac4eb48d00928124a21c09 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 10:16:02 -0400 Subject: [PATCH 07/10] better name for profile results --- nuTens/instrumentation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp index dbc070c..f302e6c 100644 --- a/nuTens/instrumentation.hpp +++ b/nuTens/instrumentation.hpp @@ -175,7 +175,7 @@ class InstrumentationTimer #ifdef USE_PROFILING // NOLINTNEXTLINE #define NT_PROFILE_BEGINSESSION(sessionName) \ - ProfileWriter::get().beginSession(sessionName, std::string(sessionName) + "-results.json") + ProfileWriter::get().beginSession(sessionName, std::string(sessionName) + "-profile.json") #else #define NT_PROFILE_BEGINSESSION(sessionName) #endif From 7cb2f90cfac5b684ccfe0c10fc6effb890606c40 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 15:52:05 +0100 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- nuTens/instrumentation.hpp | 20 ++++++++++---------- nuTens/propagator/const-density-solver.hpp | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp index f302e6c..b9127e3 100644 --- a/nuTens/instrumentation.hpp +++ b/nuTens/instrumentation.hpp @@ -5,7 +5,7 @@ #include #include #include - +#include /*! \file instrumentation.hpp \brief Define utilities for instrumentation of the code @@ -45,7 +45,7 @@ class ProfileWriter public: /// @brief Constructor - ProfileWriter() : _name(""), _profileCount(0) + ProfileWriter() { } @@ -81,10 +81,10 @@ class ProfileWriter std::replace(name.begin(), name.end(), '"', '\''); _outputStream << "{"; - _outputStream << "\"cat\":\"function\","; + _outputStream << R"("cat":"function",)"; _outputStream << "\"dur\":" << (result.end - result.start) << ','; - _outputStream << "\"name\":\"" << name << "\","; - _outputStream << "\"ph\":\"X\","; + _outputStream << R"("name":")" << name << "\","; + _outputStream << R"("ph":"X",)"; _outputStream << "\"pid\":0,"; _outputStream << "\"tid\":" << result.threadID << ","; _outputStream << "\"ts\":" << result.start; @@ -96,7 +96,7 @@ class ProfileWriter /// @brief Write the file header void writeHeader() { - _outputStream << "{\"otherData\": {},\"traceEvents\":["; + _outputStream << R"({"otherData": {},"traceEvents":[)"; _outputStream.flush(); } @@ -118,7 +118,7 @@ class ProfileWriter private: std::string _name; std::ofstream _outputStream; - uint _profileCount; + uint _profileCount{0}; }; class InstrumentationTimer @@ -134,7 +134,7 @@ class InstrumentationTimer /// @brief Construct an InstrumentationTimer object and start the clock /// @param[in] name The name of the profile. Typically use __FUNCSIG__ so it's clear which part of the code is being /// profiled. - InstrumentationTimer(const std::string &name) : _name(name), _stopped(false) + InstrumentationTimer(std::string name) : _name(std::move(name)), _stopped(false) { _startTimepoint = std::chrono::high_resolution_clock::now(); } @@ -142,10 +142,10 @@ class InstrumentationTimer /// @brief Destroy the timer object and stop the timer by calling stop() ~InstrumentationTimer() { - if (!_stopped) + if (!_stopped) { stop(); } - +} /// @brief Stop the timer and write out the profile result using the ProfileWriter void stop() { diff --git a/nuTens/propagator/const-density-solver.hpp b/nuTens/propagator/const-density-solver.hpp index 26c87ee..3ff1e48 100644 --- a/nuTens/propagator/const-density-solver.hpp +++ b/nuTens/propagator/const-density-solver.hpp @@ -42,7 +42,7 @@ class ConstDensityMatterSolver : public BaseMatterSolver /// @brief Set a new PMNS matrix for this solver /// @param newPMNS The new matrix to set - inline void setPMNS(const Tensor &newPMNS) + inline void setPMNS(const Tensor &newPMNS) override { NT_PROFILE(); PMNS = newPMNS; @@ -55,7 +55,7 @@ class ConstDensityMatterSolver : public BaseMatterSolver /// @brief Set new mass eigenvalues for this solver /// @param newMasses The new masses - inline void setMasses(const Tensor &newMasses) + inline void setMasses(const Tensor &newMasses) override { NT_PROFILE(); masses = newMasses; From 5f978a6f975ba138a6975ccdea1f7d7de0a9cdac Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 16:06:51 +0100 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- nuTens/instrumentation.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp index b9127e3..a42c9ac 100644 --- a/nuTens/instrumentation.hpp +++ b/nuTens/instrumentation.hpp @@ -45,9 +45,8 @@ class ProfileWriter public: /// @brief Constructor - ProfileWriter() - { - } + ProfileWriter() + = default; /// @brief Set up the session /// @param[in] name The name of the timer @@ -142,10 +141,11 @@ class InstrumentationTimer /// @brief Destroy the timer object and stop the timer by calling stop() ~InstrumentationTimer() { - if (!_stopped) { + if (!_stopped) + { stop(); } -} + } /// @brief Stop the timer and write out the profile result using the ProfileWriter void stop() { From 26b3dd6d7b82e03580575bcb6b87a2b31c7f8a06 Mon Sep 17 00:00:00 2001 From: Ewan Miller Date: Thu, 1 Aug 2024 11:42:59 -0400 Subject: [PATCH 10/10] delete the copy constructor for InstrumentationTimer --- nuTens/instrumentation.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nuTens/instrumentation.hpp b/nuTens/instrumentation.hpp index a42c9ac..7c6a4f8 100644 --- a/nuTens/instrumentation.hpp +++ b/nuTens/instrumentation.hpp @@ -45,8 +45,7 @@ class ProfileWriter public: /// @brief Constructor - ProfileWriter() - = default; + ProfileWriter() = default; /// @brief Set up the session /// @param[in] name The name of the timer @@ -144,8 +143,10 @@ class InstrumentationTimer if (!_stopped) { stop(); - } } + } + + InstrumentationTimer(const InstrumentationTimer &) = delete; /// @brief Stop the timer and write out the profile result using the ProfileWriter void stop() {