Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce subsystems for memory and loop #590

Merged
merged 45 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a79efab
c1
May 7, 2024
a458b71
Move nextFreeMemoryRegionId
May 7, 2024
ca4fd1b
Other files
May 7, 2024
0466e44
Restore previous functionality
May 8, 2024
397427e
Merge allocations
May 8, 2024
de1d888
Move MAKRO for timers to the callsite
May 8, 2024
10ee010
Move memory-related files
May 8, 2024
c631fd8
Move functionality related to loops
May 8, 2024
9503531
Minor cleanup
May 8, 2024
a6b0550
Use RAII for timing and verbose output
May 8, 2024
2494de5
Move function-related globals
May 8, 2024
27a48ac
Small reorganisation
May 8, 2024
e0b5494
Split code in rtlib/loop/ and add tests
May 15, 2024
fee4814
Clean loops
May 16, 2024
c4bc590
Change debug output
May 16, 2024
bd570eb
Merge pull request #4 from discopop-project/master
Sonnexo May 16, 2024
09c2b05
Merge branch 'master' into memory-tests
Sonnexo May 17, 2024
8e4d999
Merge with discopop
May 17, 2024
ffeb8c2
Merge pull request #6 from Sonnexo/memory-tests
Sonnexo May 17, 2024
3da5efe
Delete unnecessary file
May 17, 2024
0bc08e3
Add tests for MRTNode
May 17, 2024
2f76f95
Add tests for MemoryRegionTree
May 17, 2024
dbfd795
Fix stack accesses
May 17, 2024
d5630eb
Fix name of dp_loop_X
May 17, 2024
980460a
Add MemoryRegionTree2 with tests
May 21, 2024
e6a3803
Add free to MemoryRegionTree2 with tests
May 22, 2024
bb74ba9
Add benchmarks for MemeoryRegionTree2
May 22, 2024
91ce47e
Add tests for MemoryManager
May 22, 2024
ce8cc4c
Fix some loose ends 1
May 22, 2024
054dc19
Fix some loose ends 2
May 22, 2024
f04399f
Fix some loose ends
May 22, 2024
e707ad5
More tests for scope
May 22, 2024
a0271c8
Move two injected functions
May 22, 2024
5c3269f
Take first three commits
May 22, 2024
ba6aff8
Take fourth and fifth commits
May 22, 2024
a1ebf8a
Take sixth commit
May 22, 2024
129d357
Merge pull request #8 from Sonnexo/memory-tests
Sonnexo May 22, 2024
9ad9302
Merge branch 'dp-master' into fc-master
Sonnexo May 22, 2024
943ea0c
Polish merge
May 22, 2024
28ec1e2
Merge pull request #11 from Sonnexo/fc-master
Sonnexo May 22, 2024
918965e
Change line endings
May 22, 2024
abb5775
Change capitalization
May 22, 2024
f3f769e
Change capitalization
May 22, 2024
d4fd240
Add operator!= for old gcc versions
May 22, 2024
a7d38d0
Merge branch 'master' into master
lukasrothenberger Jun 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions DiscoPoP/DiscoPoP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2004,7 +2004,7 @@ void DiscoPoP::dp_reduction_insert_functions() {
llvm::FunctionType *loop_incr_fn_type = llvm::FunctionType::get(
llvm::Type::getVoidTy(*ctx_), loop_incr_fn_args, false);
FunctionCallee incr_loop_counter_callee =
module_->getOrInsertFunction("incr_loop_counter", loop_incr_fn_type);
module_->getOrInsertFunction("__dp_loop_incr", loop_incr_fn_type);

for (auto const &loop_info : loops_) {
llvm::Value *val =
Expand All @@ -2019,11 +2019,11 @@ void DiscoPoP::dp_reduction_insert_functions() {
loop_metadata_file.close();

// add a function to output the final data
// loop_counter_output
// dp_loop_output
llvm::FunctionType *output_fn_type =
llvm::FunctionType::get(llvm::Type::getVoidTy(*ctx_), false);
FunctionCallee loop_counter_output_callee =
module_->getOrInsertFunction("loop_counter_output", output_fn_type);
module_->getOrInsertFunction("__dp_loop_output", output_fn_type);
FunctionCallee cu_taken_branch_counter_output_callee =
module_->getOrInsertFunction("__dp_taken_branch_counter_output",
output_fn_type);
Expand Down
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ target_sources(
DiscoPoP_BM
PRIVATE
benchmarks.cpp
memory_region_tree/benchmark_memory_region_tree.cpp
perfect_shadow/benchmark_perfect_shadow.cpp
scope/benchmark_scope.cpp)

Expand Down
193 changes: 193 additions & 0 deletions benchmark/memory_region_tree/benchmark_memory_region_tree.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* This file is part of the DiscoPoP software
* (http://www.discopop.tu-darmstadt.de)
*
* Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
*
* This software may be modified and distributed under the terms of
* the 3-Clause BSD License. See the LICENSE file in the package base
* directory for details.
*
*/

#include <benchmark/benchmark.h>

#include <algorithm>
#include <cstdint>
#include <random>
#include <vector>

#include "../../rtlib/memory/MemoryRegionTree.hpp"

// General functions

static std::vector<ADDR> convert_to_address(const std::int64_t number_iterations) {
auto mt = std::mt19937{0};
auto uid = std::uniform_int_distribution<ADDR>{0, 0x7FFFFFFFFFFFFFFF};

auto addresses = std::vector<ADDR>{};
addresses.resize(number_iterations);

for (auto i = std::int64_t(0); i < number_iterations; i++) {
addresses[i] = uid(mt);
}

std::sort(addresses.begin(), addresses.end());

return addresses;
}

// Benchmarks for old version (i.e., establishing a base line)

static void benchmark_mrt_allocate_region(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<__dp::MemoryRegionTree>{};

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
state.ResumeTiming();

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

state.PauseTiming();
dumping_ground.emplace_back(std::move(tree));
state.ResumeTiming();
}
}

static void benchmark_mrt_get_memory_region_id(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
benchmark::DoNotOptimize(tree.get_memory_region_id(addresses[i]));
}
}
}

static void benchmark_mrt_get_memory_region_id_string_found(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<std::string>{};
dumping_ground.reserve(number_iterations);

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
dumping_ground.emplace_back(tree.get_memory_region_id_string(addresses[i], "fallback"));
}
}
}

static void benchmark_mrt_get_memory_region_id_string_fallback(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<std::string>{};
dumping_ground.reserve(number_iterations);

for (auto _ : state) {
for (auto i = 0; i < number_iterations * 2; i++) {
const auto base_address = addresses[i];
const auto address = (i % 2 == 0) ? base_address - 1 : base_address + 1;

dumping_ground.emplace_back(tree.get_memory_region_id_string(address, "fallback"));
}
}
}

static void benchmark_mrt_destructor(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}
state.ResumeTiming();
}
}

static void benchmark_mrt_free_region(benchmark::State& state) {
const auto number_iterations = state.range(0);

const auto addresses = convert_to_address(number_iterations * 2);

// This exists so that the destructor call does not interfere with the timing
auto dumping_ground = std::vector<__dp::MemoryRegionTree>{};

auto tree = __dp::MemoryRegionTree{};

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}

for (auto _ : state) {
state.PauseTiming();
auto tree = __dp::MemoryRegionTree{};
for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.allocate_region(addresses[i], addresses[i + 1], i + 1);
}
state.ResumeTiming();

for (auto i = 0; i < number_iterations * 2; i += 2) {
tree.free_region(addresses[i]);
}

state.PauseTiming();
dumping_ground.emplace_back(std::move(tree));
state.ResumeTiming();
}
}

BENCHMARK(benchmark_mrt_allocate_region)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_allocate_region)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id_string_found)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id_string_found)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_get_memory_region_id_string_fallback)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_get_memory_region_id_string_fallback)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_destructor)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_destructor)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);

BENCHMARK(benchmark_mrt_free_region)->Unit(benchmark::kMillisecond)->Arg(32)->Iterations(100);
BENCHMARK(benchmark_mrt_free_region)->Unit(benchmark::kMillisecond)->Arg(1024)->Iterations(100);
2 changes: 1 addition & 1 deletion benchmark/perfect_shadow/benchmark_perfect_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <cstdint>
#include <vector>

#include "../../rtlib/perfect_shadow.hpp"
#include "../../rtlib/memory/PerfectShadow.hpp"

// General functions

Expand Down
2 changes: 1 addition & 1 deletion benchmark/scope/benchmark_scope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <cstdint>
#include <vector>

#include "../../rtlib/scope.hpp"
#include "../../rtlib/memory/Scope.hpp"

// General functions

Expand Down
78 changes: 39 additions & 39 deletions docs/setup/discopop.md
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
---
layout: default
title: DiscoPoP
parent: Setup
nav_order: 1
---

# DiscoPoP Setup
## Prerequisites
- LLVM/clang version 11
- Python version 3.6 or greater

## Setup
```
git clone git@github.com:discopop-project/discopop.git
cd discopop
mkdir build
```

## Build libraries and install Python modules
```
cd build
cmake .. <CMAKE_FLAGS>
make
cd ..
```

where `<CMAKE_FLAGS>` can consist of any combination of the following flags and commonly used CMAKE_FLAGS:
- In case you want to use a specific Version of LLVM, it is possible to specify the `-DUSE_LLVM_VERSION=<version>` flag.
- In case you want to use a specific LLVM installation, specify the location via the `-DLLVM_DIST_PATH=<llvm_base_dir>` flag.
- In case your application uses PThreads, please specify `-DDP_PTHREAD_COMPATIBILITY_MODE=[0|1]`. Note, however, that this can influence the runtime of the profiling.
- In case you require a more verbose output of the runtime library, specify the `-DDP_RTLIB_VERBOSE=[0|1]` flag.
- In case you want to specify the number of Workers available for the profiling step, specify the `-DDP_NUM_WORKERS=<int>` flag. By default, `3` worker threads are used to analyze the observed memory accesses. `0` might be used to disable the creation of additional threads for the analysis.

## Testing the installation
To test the installation, it is possible to execute the provided set of unit tests.
```
python -m unittest -v
```
---
layout: default
title: DiscoPoP
parent: Setup
nav_order: 1
---
# DiscoPoP Setup
## Prerequisites
- LLVM/clang version 11
- Python version 3.6 or greater
## Setup
```
git clone git@github.com:discopop-project/discopop.git
cd discopop
mkdir build
```
## Build libraries and install Python modules
```
cd build
cmake .. <CMAKE_FLAGS>
make
cd ..
```
where `<CMAKE_FLAGS>` can consist of any combination of the following flags and commonly used CMAKE_FLAGS:
- In case you want to use a specific Version of LLVM, it is possible to specify the `-DUSE_LLVM_VERSION=<version>` flag.
- In case you want to use a specific LLVM installation, specify the location via the `-DLLVM_DIST_PATH=<llvm_base_dir>` flag.
- In case your application uses PThreads, please specify `-DDP_PTHREAD_COMPATIBILITY_MODE=[0|1]`. Note, however, that this can influence the runtime of the profiling.
- In case you require a more verbose output of the runtime library, specify the `-DDP_RTLIB_VERBOSE=[0|1]` flag.
- In case you want to specify the number of Workers available for the profiling step, specify the `-DDP_NUM_WORKERS=<int>` flag. By default, `3` worker threads are used to analyze the observed memory accesses. `0` might be used to disable the creation of additional threads for the analysis.
## Testing the installation
To test the installation, it is possible to execute the provided set of unit tests.
```
python -m unittest -v
```
45 changes: 23 additions & 22 deletions rtlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,29 @@
set(DiscoPoP_SOURCES
iFunctions.cpp
iFunctionsGlobals.cpp
iFunctionsTypes.cpp
signature.cpp
loop_counter.cpp
cu_taken_branch_counter.cpp
../share/lib/timer.cpp
MemoryRegionTree.cpp

memory/MemoryManager.cpp
memory/Signature.cpp

functions/dp_add_bb_deps.cpp
functions/dp_alloca.cpp
functions/dp_call.cpp
functions/dp_decl.cpp
functions/dp_delete.cpp
functions/dp_finalize.cpp
functions/dp_func_entry.cpp
functions/dp_func_exit.cpp
functions/dp_loop_entry.cpp
functions/dp_loop_exit.cpp
functions/dp_new.cpp
functions/dp_read.cpp
functions/dp_report_bb.cpp
functions/dp_report_bb_pair.cpp
functions/dp_write.cpp
injected_functions/dp_add_bb_deps.cpp
injected_functions/dp_alloca.cpp
injected_functions/dp_call.cpp
injected_functions/dp_decl.cpp
injected_functions/dp_delete.cpp
injected_functions/dp_finalize.cpp
injected_functions/dp_func_entry.cpp
injected_functions/dp_func_exit.cpp
injected_functions/dp_incr_taken_branch_counter.cpp
injected_functions/dp_loop_entry.cpp
injected_functions/dp_loop_exit.cpp
injected_functions/dp_loop_incr.cpp
injected_functions/dp_loop_output.cpp
injected_functions/dp_new.cpp
injected_functions/dp_read.cpp
injected_functions/dp_report_bb.cpp
injected_functions/dp_report_bb_pair.cpp
injected_functions/dp_taken_branch_counter_output.cpp
injected_functions/dp_write.cpp
)

set(CMAKE_CXX_FLAGS
Expand Down Expand Up @@ -70,4 +71,4 @@ install(TARGETS DiscoPoP_RT ARCHIVE DESTINATION lib)
# compile simple-alias-detection
#exec_program(${CMAKE_CURRENT_SOURCE_DIR}/simple-alias-detection/compile.sh
# ARGS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
#)
#)
6 changes: 6 additions & 0 deletions rtlib/DPTypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@

// #define SKIP_DUP_INSTR 1

// To manually enable/disable internal timing
// #define DP_SKIP_INTERNAL_TIMER
#ifndef DP_SKIP_INTERNAL_TIMER
#define DP_INTERNAL_TIMER
#endif

typedef std::int64_t LID;
typedef std::int64_t ADDR;
typedef std::int64_t sigElement;
Expand Down
Loading
Loading