diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/sanitizers.yml index d92e1e44..acaafa87 100644 --- a/.github/workflows/sanitizers.yml +++ b/.github/workflows/sanitizers.yml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - sanitizer: [ address, undefined ] + sanitizer: [ address, undefined, thread ] include: - cxx: clang++-15 pkgs: clang-15 llvm-15 libtbb-dev diff --git a/.tsan-supressions b/.tsan-supressions index f312e500..1a873fc1 100644 --- a/.tsan-supressions +++ b/.tsan-supressions @@ -1,7 +1,3 @@ -# Suppress warnings from Intel TBB -# (libstdc++ uses TBB to implement the parallel std algorithms) - -# clang race:^tbb::detail::d1::node::node race:^tbb::detail::d1::tree_node::tree_node @@ -14,10 +10,5 @@ race:tbb::detail::d1::small_object_allocator::delete_object race:tbb::detail::d1::dynamic_grainsize_mode<*>::check_being_stolen -race:^tbb::detail::d1::start_for<*>::finalize - -# This supression is just supposed to be the first one, but the function name isnt always displayed properly in the stack trace -race:^tbb::detail::d1::start_for<*>::offer_work -race:tbb::detail::d1::auto_partitioner const>::offer_work(tbb::detail::d0::split&, tbb::detail::d1::execution_data&) - -race:^tbb::detail::r1::isolate_within_arena \ No newline at end of file +race:tbb::detail::d1::start_for<*>::finalize +race:tbb::detail::d1::start_for<*>::offer_work_impl diff --git a/CMakeLists.txt b/CMakeLists.txt index 758f993e..dabcb6e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,6 +185,7 @@ if(GAPP_BUILD_TESTS AND BUILD_TESTING AND PROJECT_IS_TOP_LEVEL) add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/test/unit") add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/test/integration") add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/test/misc") + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/test/tsan") endif() if(GAPP_BUILD_BENCHMARKS AND BUILD_TESTING AND PROJECT_IS_TOP_LEVEL) diff --git a/src/algorithm/nd_sort.cpp b/src/algorithm/nd_sort.cpp index 3f23dea0..d84b7efb 100644 --- a/src/algorithm/nd_sort.cpp +++ b/src/algorithm/nd_sort.cpp @@ -5,6 +5,7 @@ #include "../utility/algorithm.hpp" #include "../utility/functional.hpp" #include "../utility/iterators.hpp" +#include "../utility/parallel_for.hpp" #include "../utility/math.hpp" #include "../utility/utility.hpp" #include "../utility/matrix.hpp" @@ -90,7 +91,7 @@ namespace gapp::algorithm::dtl { const size_t popsize = std::distance(first, last); - thread_local DominanceLists dom_lists; + static DominanceLists dom_lists; if (dom_lists.size() != popsize) { @@ -188,7 +189,7 @@ namespace gapp::algorithm::dtl const size_t popsize = std::distance(first, last); DominanceMatrix dmat(popsize, popsize /*, MAXIMAL */); - std::for_each(GAPP_EXEC_UNSEQ, detail::iota_iterator(0_sz), detail::iota_iterator(first->size()), [&](size_t obj) + detail::parallel_for(detail::iota_iterator(0_sz), detail::iota_iterator(first->size()), [&](size_t obj) { FitnessVector fvec(popsize); std::transform(first, last, fvec.begin(), detail::element_at(obj)); @@ -212,7 +213,7 @@ namespace gapp::algorithm::dtl }); }); - std::for_each(GAPP_EXEC_UNSEQ, detail::iota_iterator(0_sz), detail::iota_iterator(popsize), [&](size_t row) noexcept + detail::parallel_for(detail::iota_iterator(0_sz), detail::iota_iterator(popsize), [&](size_t row) noexcept { dmat(row, row).store(NONMAXIMAL, std::memory_order_relaxed); // diagonal is all nonmax diff --git a/src/algorithm/nsga3.cpp b/src/algorithm/nsga3.cpp index d1f35e03..21cb640e 100644 --- a/src/algorithm/nsga3.cpp +++ b/src/algorithm/nsga3.cpp @@ -8,6 +8,7 @@ #include "../metrics/pop_stats.hpp" #include "../utility/algorithm.hpp" #include "../utility/functional.hpp" +#include "../utility/parallel_for.hpp" #include "../utility/math.hpp" #include "../utility/rng.hpp" #include "../utility/utility.hpp" @@ -239,7 +240,7 @@ namespace gapp::algorithm sol_info_.resize(last - first); - std::for_each(GAPP_EXEC_UNSEQ, pfirst, plast, [&](const FrontInfo& sol) + detail::parallel_for(pfirst, plast, [&](const FrontInfo& sol) { const FitnessVector fnorm = normalizeFitnessVec(first[sol.idx], ideal_point_, nadir_point_); diff --git a/src/algorithm/reference_lines.cpp b/src/algorithm/reference_lines.cpp index d4e571c4..43f2ef93 100644 --- a/src/algorithm/reference_lines.cpp +++ b/src/algorithm/reference_lines.cpp @@ -183,12 +183,11 @@ namespace gapp::algorithm::reflines min_distances.pop_back(); /* Calc the distance of each candidate to the closest ref point. */ - std::transform(GAPP_EXEC_UNSEQ, candidate_points.begin(), candidate_points.end(), min_distances.begin(), min_distances.begin(), - [&](const Point& candidate, double current_min) noexcept + for (size_t i = 0; i < candidate_points.size(); i++) { - const double dist = math::euclideanDistanceSq(candidate, points.back()); - return std::min(current_min, dist); - }); + double dist = math::euclideanDistanceSq(candidate_points[i], points.back()); + min_distances[i] = std::min(min_distances[i], dist); + } } return points; diff --git a/src/core/ga_base.impl.hpp b/src/core/ga_base.impl.hpp index 2ea0dc53..d480f62f 100644 --- a/src/core/ga_base.impl.hpp +++ b/src/core/ga_base.impl.hpp @@ -17,6 +17,7 @@ #include "../stop_condition/stop_condition_base.hpp" #include "../utility/algorithm.hpp" #include "../utility/functional.hpp" +#include "../utility/parallel_for.hpp" #include "../utility/scope_exit.hpp" #include "../utility/utility.hpp" #include @@ -327,7 +328,7 @@ namespace gapp /* Reset state in case solve() has already been called before. */ generation_cntr_ = 0; - num_fitness_evals_ = 0; + num_fitness_evals_->store(0, std::memory_order_relaxed); solutions_.clear(); population_.clear(); @@ -339,7 +340,7 @@ namespace gapp /* Create and evaluate the initial population of the algorithm. */ num_objectives_ = findNumberOfObjectives(); population_ = generatePopulation(population_size_, std::move(initial_population)); - std::for_each(GAPP_EXEC_UNSEQ, population_.begin(), population_.end(), [this](Candidate& sol) { evaluate(sol); }); + detail::parallel_for(population_.begin(), population_.end(), [this](Candidate& sol) { evaluate(sol); }); fitness_matrix_ = detail::toFitnessMatrix(population_); if (keep_all_optimal_sols_) solutions_ = detail::findParetoFront(population_); @@ -465,11 +466,10 @@ namespace gapp * is no point doing it again. */ if (!sol.is_evaluated || fitness_function_->dynamic()) { + num_fitness_evals_->fetch_add(1, std::memory_order_release); + sol.fitness = (*fitness_function_)(sol.chromosome); sol.is_evaluated = true; - - std::atomic_ref num_evals{ num_fitness_evals_ }; - num_evals.fetch_add(1_sz, std::memory_order_acq_rel); } GAPP_ASSERT(hasValidFitness(sol)); @@ -504,13 +504,12 @@ namespace gapp std::vector> child_pairs(num_children / 2); prepareSelections(); - std::generate(GAPP_EXEC_UNSEQ, child_pairs.begin(), child_pairs.end(), - [this] + detail::parallel_for(child_pairs.begin(), child_pairs.end(), + [this](CandidatePair& children) { const auto& parent1 = select(); const auto& parent2 = select(); - - return crossover(parent1, parent2); + children = crossover(parent1, parent2); }); auto children = detail::flatten(std::move(child_pairs)); @@ -518,7 +517,7 @@ namespace gapp /* If the population size is odd, one too many child candidates were generated by the crossovers. */ if (children.size() > population_size_) children.pop_back(); - std::for_each(GAPP_EXEC_UNSEQ, children.begin(), children.end(), + detail::parallel_for(children.begin(), children.end(), [this](Candidate& child) { mutate(child); diff --git a/src/core/ga_info.cpp b/src/core/ga_info.cpp index 0f40a3e9..8bf57252 100644 --- a/src/core/ga_info.cpp +++ b/src/core/ga_info.cpp @@ -4,7 +4,6 @@ #include "../algorithm/single_objective.hpp" #include "../stop_condition/stop_condition.hpp" #include "../utility/utility.hpp" -#include #include #include @@ -13,7 +12,7 @@ namespace gapp GaInfo::GaInfo(GaInfo&&) noexcept = default; GaInfo& GaInfo::operator=(GaInfo&&) noexcept = default; - GaInfo::~GaInfo() = default; + GaInfo::~GaInfo() noexcept = default; GaInfo::GaInfo(Positive population_size, std::unique_ptr algorithm, std::unique_ptr stop_condition) noexcept : @@ -26,8 +25,7 @@ namespace gapp size_t GaInfo::num_fitness_evals() const noexcept { - std::atomic_ref num_fitness_evals{ num_fitness_evals_ }; - return num_fitness_evals.load(std::memory_order_acquire); + return num_fitness_evals_->load(std::memory_order_acquire); } void GaInfo::algorithm(std::unique_ptr f) diff --git a/src/core/ga_info.hpp b/src/core/ga_info.hpp index 6d059f2b..8708c1e5 100644 --- a/src/core/ga_info.hpp +++ b/src/core/ga_info.hpp @@ -5,6 +5,7 @@ #include "../population/population.hpp" #include "../utility/bounded_value.hpp" +#include "../utility/atomic.hpp" #include "../utility/utility.hpp" #include "../metrics/metric_set.hpp" #include @@ -341,7 +342,7 @@ namespace gapp GaInfo& operator=(const GaInfo&) = delete; /** Destructor. */ - virtual ~GaInfo(); + virtual ~GaInfo() noexcept; protected: @@ -359,7 +360,7 @@ namespace gapp Positive max_gen_ = 500; size_t num_objectives_ = 0; size_t generation_cntr_ = 0; - size_t num_fitness_evals_ = 0; + detail::atomic num_fitness_evals_ = 0; bool keep_all_optimal_sols_ = false; bool use_default_algorithm_ = false; diff --git a/src/metrics/pop_stats.cpp b/src/metrics/pop_stats.cpp index 284a3e3e..d5894cae 100644 --- a/src/metrics/pop_stats.cpp +++ b/src/metrics/pop_stats.cpp @@ -5,6 +5,7 @@ #include "../population/population.hpp" #include "../utility/algorithm.hpp" #include "../utility/iterators.hpp" +#include "../utility/parallel_for.hpp" #include "../utility/utility.hpp" #include #include @@ -154,17 +155,18 @@ namespace gapp::detail const FitnessMatrix front = uniqueSortedParetoFront(fmat); std::atomic hypervolume = 0.0; - std::for_each(GAPP_EXEC_UNSEQ, detail::iota_iterator(0_sz), detail::iota_iterator(front.size()), [&](size_t idx) + + detail::parallel_for(detail::iota_iterator(0_sz), detail::iota_iterator(front.size()), [&](size_t idx) { const auto point = front[idx]; const FitnessMatrix rest = { front.begin() + idx + 1, front.end() }; const double exclusive_hypervolume = exclusiveHypervolume(point, rest, ref_point); - hypervolume.fetch_add(exclusive_hypervolume, std::memory_order_acq_rel); + hypervolume.fetch_add(exclusive_hypervolume, std::memory_order_relaxed); }); - return hypervolume.load(std::memory_order_acquire); + return hypervolume.load(std::memory_order_relaxed); } static inline double hypervolume(seq, const FitnessMatrix& fmat, std::span ref_point) diff --git a/src/population/population.hpp b/src/population/population.hpp index 6d0ccf4b..9377cef2 100644 --- a/src/population/population.hpp +++ b/src/population/population.hpp @@ -54,6 +54,7 @@ namespace gapp::detail #include "../utility/algorithm.hpp" #include "../utility/functional.hpp" #include "../utility/iterators.hpp" +#include "../utility/parallel_for.hpp" #include "../utility/utility.hpp" #include "../utility/math.hpp" #include @@ -87,7 +88,6 @@ namespace gapp::detail GAPP_ASSERT(std::all_of(pop.begin(), pop.end(), [&](const Candidate& sol) { return sol.fitness.size() == pop[0].fitness.size(); })); auto fitness_matrix = detail::toFitnessMatrix(pop); - auto optimal_indices = detail::findParetoFront(fitness_matrix); return detail::select(pop, optimal_indices); @@ -106,7 +106,7 @@ namespace gapp::detail std::vector lhs_state(lhs.size()); std::vector> rhs_state(rhs.size()); - std::for_each(GAPP_EXEC_UNSEQ, iota_iterator(0_sz), iota_iterator(lhs.size()), [&](size_t i) noexcept + detail::parallel_for(iota_iterator(0_sz), iota_iterator(lhs.size()), [&](size_t i) noexcept { for (size_t j = 0; j < rhs.size(); j++) { @@ -152,11 +152,13 @@ namespace gapp::detail for (size_t i = 0; i < rhs.size(); i++) { - if (rhs_state[i].load(std::memory_order_relaxed) != DOMINATED) optimal_solutions.push_back(std::move(rhs[i])); + if (rhs_state[i].load(std::memory_order_relaxed) != DOMINATED) + optimal_solutions.push_back(std::move(rhs[i])); } for (size_t i = 0; i < lhs.size(); i++) { - if (lhs_state[i] != DOMINATED) optimal_solutions.push_back(std::move(lhs[i])); + if (lhs_state[i] != DOMINATED) + optimal_solutions.push_back(std::move(lhs[i])); } return optimal_solutions; diff --git a/src/utility/atomic.hpp b/src/utility/atomic.hpp new file mode 100644 index 00000000..b3881d9d --- /dev/null +++ b/src/utility/atomic.hpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2023 Krisztián Rugási. Subject to the MIT License. */ + +#ifndef GA_UTILITY_ATOMIC_HPP +#define GA_UTILITY_ATOMIC_HPP + +#include +#include +#include + +namespace gapp::detail +{ + /* + * This is a simple wrapper class around std::atomic with atomic + * initialization, in order to prevent data races between the + * initialization of the variable and later accesses to it. + * + * The wrapper also adds move operators for convenience. + */ + template + class atomic + { + public: + atomic(std::memory_order order = std::memory_order_seq_cst) noexcept + { + data_.store(T{}, order); + } + + atomic(T value, std::memory_order order = std::memory_order_seq_cst) noexcept + { + data_.store(std::move(value), order); + } + + atomic(atomic&& other) noexcept + { + data_.store(other->load()); + } + + atomic& operator=(T value) noexcept + { + data_.store(std::move(value)); + return *this; + } + + atomic& operator=(atomic&& other) noexcept + { + data_.store(other->load()); + return *this; + } + + ~atomic() noexcept = default; // maybe needs release? + + std::atomic& operator*() noexcept { return data_; } + std::atomic* operator->() noexcept { return std::addressof(data_); } + const std::atomic& operator*() const noexcept { return data_; } + const std::atomic* operator->() const noexcept { return std::addressof(data_); } + + private: + std::atomic data_; + }; + + +} // namespace gapp::detail + +#endif // !GA_UTILITY_ATOMIC_HPP diff --git a/src/utility/indestructible.hpp b/src/utility/indestructible.hpp new file mode 100644 index 00000000..95fda4c1 --- /dev/null +++ b/src/utility/indestructible.hpp @@ -0,0 +1,52 @@ +/* Copyright (c) 2023 Krisztián Rugási. Subject to the MIT License. */ + +#ifndef GA_UTILITY_INDESTRUCTIBLE_HPP +#define GA_UTILITY_INDESTRUCTIBLE_HPP + +#include +#include +#include +#include +#include + +namespace gapp::detail +{ + template + class Indestructible + { + public: + template + constexpr Indestructible(Args&&... args) + noexcept(std::is_nothrow_constructible_v) + { + ::new(std::addressof(data_)) T(std::forward(args)...); + } + + Indestructible(const Indestructible&) = delete; + Indestructible(Indestructible&&) = delete; + Indestructible& operator=(const Indestructible&) = delete; + Indestructible& operator=(Indestructible&&) = delete; + + ~Indestructible() = default; + + // These can't be constexpr because bit_cast between ptr types isn't constexpr + T& get() noexcept { return *std::bit_cast(std::addressof(data_)); } + const T& get() const noexcept { return *std::bit_cast(std::addressof(data_)); } + + T& operator*() noexcept { return get(); } + const T& operator*() const noexcept { return get(); } + + T* operator->() noexcept { return std::addressof(get()); } + const T* operator->() const noexcept { return std::addressof(get()); } + + /* implicit */ operator T&() & noexcept { return get(); } + /* implicit */ operator const T&() const& noexcept { return get(); } + + private: + using storage_type = unsigned char[sizeof(T)]; // NOLINT(modernize-avoid-c-arrays) + alignas(T) storage_type data_; + }; + +} // namespace gapp::detail + +#endif // !GA_UTILITY_INDESTRUCTIBLE_HPP diff --git a/src/utility/parallel_for.hpp b/src/utility/parallel_for.hpp new file mode 100644 index 00000000..d13d809f --- /dev/null +++ b/src/utility/parallel_for.hpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2023 Krisztián Rugási. Subject to the MIT License. */ + +#ifndef GA_UTILITY_PARALLEL_FOR_HPP +#define GA_UTILITY_PARALLEL_FOR_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace gapp::detail +{ + template + requires std::invocable> + void parallel_for(Iter first, Iter last, F&& f) + { + constinit static std::atomic barrier; + auto tsan_memory_barrier = [](std::memory_order order) { std::ignore = barrier.exchange(false, order); }; + + std::atomic has_exception; + std::exception_ptr exception; + + auto thread_guard = [&]() noexcept + { + if (!has_exception.exchange(true, std::memory_order_relaxed)) + exception = std::current_exception(); + }; + + auto thread_func = [&](auto&& elem) noexcept + { + tsan_memory_barrier(std::memory_order_acquire); + try { std::invoke(f, std::forward(elem)); } + catch (...) { std::invoke(thread_guard); } + tsan_memory_barrier(std::memory_order_release); + }; + + tsan_memory_barrier(std::memory_order_release); + std::for_each(std::execution::par, first, last, std::move(thread_func)); + tsan_memory_barrier(std::memory_order_acquire); + + if (has_exception.load(std::memory_order_relaxed)) + { + std::rethrow_exception(exception); + } + } + +} // namespace gapp::detail + +#endif // !GA_UTILITY_PARALLEL_FOR_HPP diff --git a/src/utility/rcu.hpp b/src/utility/rcu.hpp index 1dc64130..37b047fa 100644 --- a/src/utility/rcu.hpp +++ b/src/utility/rcu.hpp @@ -4,11 +4,13 @@ #define GA_UTILITY_RCU_HPP #include "utility.hpp" +#include "shared_spinlock.hpp" +#include "indestructible.hpp" #include +#include #include #include #include -#include #include #include @@ -37,11 +39,11 @@ namespace gapp::detail uint64_t target = current + 1; writer_epoch.compare_exchange_strong(current, target, std::memory_order_acq_rel); - std::shared_lock _{ reader_list_mtx }; + std::shared_lock _{ tls_readers->lock }; - for (const registered_reader* reader_ : reader_list) + for (const registered_reader* tls_reader : tls_readers->list) { - while (reader_->epoch.load(std::memory_order_acquire) < target) { GAPP_PAUSE(); } + while (tls_reader->epoch.load(std::memory_order_acquire) < target) GAPP_PAUSE(); } } @@ -50,25 +52,29 @@ namespace gapp::detail { registered_reader() noexcept { - std::unique_lock _{ reader_list_mtx }; - reader_list.push_back(this); + std::unique_lock _{ tls_readers->lock }; + tls_readers->list.push_back(this); } ~registered_reader() noexcept { - std::unique_lock _{ reader_list_mtx }; - std::erase(reader_list, this); + std::unique_lock _{ tls_readers->lock }; + std::erase(tls_readers->list, this); } std::atomic epoch = NOT_READING; }; - inline static constexpr uint64_t NOT_READING = std::numeric_limits::max(); + struct tls_reader_list + { + detail::shared_spinlock lock; + std::vector list; + }; - GAPP_API inline static std::vector reader_list; - GAPP_API inline static std::shared_mutex reader_list_mtx; + inline static constexpr uint64_t NOT_READING = std::numeric_limits::max(); - alignas(128) GAPP_API inline static constinit std::atomic writer_epoch = 0; + GAPP_API inline static detail::Indestructible tls_readers; + GAPP_API inline static constinit std::atomic writer_epoch = 0; alignas(128) inline static thread_local registered_reader reader; }; @@ -84,9 +90,7 @@ namespace gapp::detail ~rcu_obj() noexcept { - T* ptr = data_.load(std::memory_order_consume); - rcu_domain::synchronize(); - delete ptr; + delete data_.load(std::memory_order_consume); } template @@ -100,16 +104,20 @@ namespace gapp::detail return *this; } - T& get() const noexcept - { - return *data_.load(std::memory_order_consume); - } + T& get() noexcept { return *data_.load(std::memory_order_consume); } + const T& get() const noexcept { return *data_.load(std::memory_order_consume); } + + T& operator*() noexcept { return get(); } + const T& operator*() const noexcept { return get(); } + + T* operator->() noexcept { return std::addressof(get()); } + const T* operator->() const noexcept { return std::addressof(get()); } - T& operator*() const noexcept { return get(); } - T* operator->() const noexcept { return std::addressof(get()); } + void lock_shared() const noexcept { rcu_domain::read_lock(); } + void unlock_shared() const noexcept { rcu_domain::read_unlock(); } + bool try_lock_shared() const noexcept { rcu_domain::read_lock(); return true; } - void lock() const noexcept { rcu_domain::read_lock(); } - void unlock() const noexcept { rcu_domain::read_unlock(); } + void wait_for_readers() const noexcept { rcu_domain::synchronize(); } private: std::atomic data_; diff --git a/src/utility/rng.hpp b/src/utility/rng.hpp index dabcf5e9..04287cfa 100644 --- a/src/utility/rng.hpp +++ b/src/utility/rng.hpp @@ -3,11 +3,12 @@ #ifndef GA_UTILITY_RNG_HPP #define GA_UTILITY_RNG_HPP -#include "utility.hpp" #include "type_traits.hpp" #include "concepts.hpp" #include "bit.hpp" #include "rcu.hpp" +#include "indestructible.hpp" +#include "utility.hpp" #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -186,18 +188,18 @@ namespace gapp::rng /** @return The next number of the sequence. Thread-safe. */ result_type operator()() const noexcept { - std::scoped_lock _{ generator_.instance }; + std::shared_lock _{ generator_.instance }; return std::invoke(*generator_.instance); } /** Set a new seed for the generator. Thread-safe. */ static void seed(std::uint64_t seed) { - std::scoped_lock _{ generator_list_mtx_ }; - global_generator.seed(seed); - for (Generator* generator : generator_list) + std::scoped_lock _{ tls_generators().lock }; + global_generator().seed(seed); + for (Generator* generator : tls_generators().list) { - *generator = global_generator.jump(); + *generator = global_generator().jump(); } } @@ -212,25 +214,40 @@ namespace gapp::rng struct RegisteredGenerator { - RegisteredGenerator() + RegisteredGenerator() noexcept { - std::scoped_lock _{ generator_list_mtx_ }; - instance = global_generator.jump(); - generator_list.push_back(std::addressof(instance)); + std::scoped_lock _{ tls_generators().lock }; + instance = global_generator().jump(); + tls_generators().list.push_back(std::addressof(instance)); } ~RegisteredGenerator() noexcept { - std::scoped_lock _{ generator_list_mtx_ }; - std::erase(generator_list, std::addressof(instance)); + std::scoped_lock _{ tls_generators().lock }; + std::erase(tls_generators().list, std::addressof(instance)); } Generator instance{ 0 }; }; - GAPP_API inline static constinit Xoroshiro128p global_generator{ GAPP_SEED }; - GAPP_API inline static std::vector generator_list; - GAPP_API inline static std::mutex generator_list_mtx_; + struct GeneratorList + { + detail::spinlock lock; + std::vector list; + }; + + static GeneratorList& tls_generators() noexcept + { + static detail::Indestructible tls_generators; + return tls_generators; + } + + static Xoroshiro128p& global_generator() noexcept + { + static Xoroshiro128p global_generator{ GAPP_SEED }; + return global_generator; + } + alignas(128) inline static thread_local RegisteredGenerator generator_; }; diff --git a/src/utility/shared_spinlock.hpp b/src/utility/shared_spinlock.hpp new file mode 100644 index 00000000..3de6063a --- /dev/null +++ b/src/utility/shared_spinlock.hpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2023 Krisztián Rugási. Subject to the MIT License. */ + +#ifndef GA_UTILITY_SHARED_SPINLOCK_HPP +#define GA_UTILITY_SHARED_SPINLOCK_HPP + +#include "utility.hpp" +#include "spinlock.hpp" +#include +#include + +namespace gapp::detail +{ + class shared_spinlock + { + public: + void lock() noexcept + { + lock_.lock(); + while (read_cnt_.load(std::memory_order_relaxed)) GAPP_PAUSE(); + std::atomic_thread_fence(std::memory_order_acquire); + } + + bool try_lock() noexcept + { + return lock_.try_lock() && !read_cnt_.load(std::memory_order_acquire); + } + + void unlock() noexcept + { + lock_.unlock(); + } + + void lock_shared() noexcept + { + lock_.lock(); + read_cnt_.fetch_add(1, std::memory_order_relaxed); + lock_.unlock(); + } + + bool try_lock_shared() noexcept + { + if (lock_.try_lock()) + { + read_cnt_.fetch_add(1, std::memory_order_relaxed); + lock_.unlock(); + return true; + } + return false; + } + + void unlock_shared() noexcept + { + read_cnt_.fetch_sub(1, std::memory_order_release); + } + + private: + std::atomic read_cnt_; + detail::spinlock lock_; + }; + +} // namespace gapp::detail + +#endif // !GA_UTILITY_SHARED_SPINLOCK_HPP diff --git a/src/utility/spinlock.hpp b/src/utility/spinlock.hpp new file mode 100644 index 00000000..4c7e58dd --- /dev/null +++ b/src/utility/spinlock.hpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2023 Krisztián Rugási. Subject to the MIT License. */ + +#ifndef GA_UTILITY_SPINLOCK_HPP +#define GA_UTILITY_SPINLOCK_HPP + +#include "utility.hpp" +#include + +namespace gapp::detail +{ + class spinlock + { + public: + void lock() noexcept + { + while (true) + { + if (!locked_.test_and_set(std::memory_order_acquire)) break; + while (locked_.test(std::memory_order_relaxed)) GAPP_PAUSE(); + } + } + + bool try_lock() noexcept + { + return !locked_.test(std::memory_order_relaxed) && + !locked_.test_and_set(std::memory_order_acquire); + } + + void unlock() noexcept + { + locked_.clear(std::memory_order_release); + } + + private: + std::atomic_flag locked_; + }; + +} // namespace gapp::detail + +#endif // !GA_UTILITY_SPINLOCK_HPP diff --git a/src/utility/utility.hpp b/src/utility/utility.hpp index fc0f1662..10e53ee8 100644 --- a/src/utility/utility.hpp +++ b/src/utility/utility.hpp @@ -11,12 +11,10 @@ #include -#if __has_cpp_attribute(assume) -# define GAPP_ASSUME(expr) [[assume(expr)]] -#elif defined(_MSC_VER) && !defined(__clang__) -# define GAPP_ASSUME(expr) //__assume(expr) +#if defined(_MSC_VER) && !defined(__clang__) +# define GAPP_ASSUME(expr) __assume(expr) #elif defined(__clang__) -# define GAPP_ASSUME(expr) //__builtin_assume(expr) +# define GAPP_ASSUME(expr) __builtin_assume(expr) #else # define GAPP_ASSUME(expr) #endif @@ -95,7 +93,7 @@ #ifndef NDEBUG # define GAPP_ASSERT(condition, ...) assert( (condition) __VA_OPT__(&& (__VA_ARGS__)) ) #else -# define GAPP_ASSERT(condition, ...) GAPP_ASSUME(condition) +# define GAPP_ASSERT(condition, ...) #endif @@ -106,15 +104,6 @@ #endif -#ifndef GAPP_EXCUTION_UNSEQ -# define GAPP_EXEC_UNSEQ std::execution::par_unseq -#endif - -#ifndef GAPP_EXEC_SEQ -# define GAPP_EXEC_SEQ std::execution::par -#endif - - namespace gapp { constexpr std::size_t operator ""_sz(unsigned long long arg) noexcept diff --git a/test/benchmark/rcu.cpp b/test/benchmark/rcu.cpp index 4490fbb1..4c8565b5 100644 --- a/test/benchmark/rcu.cpp +++ b/test/benchmark/rcu.cpp @@ -20,5 +20,5 @@ TEST_CASE("rcu_lock", "[benchmark]") BENCHMARK("read") { return number; }; BENCHMARK("atomic_fetch_add") { return atomic_number.fetch_add(1); }; BENCHMARK("rwlock_read") { std::shared_lock _{ rwlock }; return number; }; - BENCHMARK("rcu_read") { std::scoped_lock _{ rcu_number }; return rcu_number.get(); }; + BENCHMARK("rcu_read") { std::shared_lock _{ rcu_number }; return rcu_number.get(); }; } diff --git a/test/misc/rcu.cpp b/test/misc/rcu.cpp index 13c3d6ef..c543f307 100644 --- a/test/misc/rcu.cpp +++ b/test/misc/rcu.cpp @@ -2,7 +2,7 @@ #include "utility/rcu.hpp" #include -#include +#include #include #include @@ -16,7 +16,7 @@ static const auto reader_func = [] { while (true) { - std::scoped_lock _{ number }; + std::shared_lock _{ number }; [[maybe_unused]] const int& n = number.get(); std::this_thread::sleep_for(2ms); assert(0 <= n && n <= 100); diff --git a/test/tsan/CMakeLists.txt b/test/tsan/CMakeLists.txt new file mode 100644 index 00000000..f20746d8 --- /dev/null +++ b/test/tsan/CMakeLists.txt @@ -0,0 +1,15 @@ +find_package(Catch2 3 REQUIRED) + +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -wd4388") +endif() + +file(GLOB_RECURSE TEST_SOURCES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp") +file(GLOB_RECURSE TEST_HEADERS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp") + +add_executable(tsan_tests ${TEST_SOURCES} ${TEST_HEADERS}) + +target_link_libraries(tsan_tests PRIVATE Catch2::Catch2WithMain gapp) + +include(Catch) +catch_discover_tests(tsan_tests) diff --git a/test/tsan/tsan.cpp b/test/tsan/tsan.cpp new file mode 100644 index 00000000..c7a8ebd7 --- /dev/null +++ b/test/tsan/tsan.cpp @@ -0,0 +1,36 @@ +#include +#include +#include + +std::vector numbers = { 0, 1, 2, 3 }; +std::mutex mtx; + +TEST_CASE("empty", "[tsan]") +{ + std::for_each(std::execution::par, numbers.begin(), numbers.end(), [](const auto&) {}); +} + +TEST_CASE("simple", "[tsan]") +{ + { std::scoped_lock _{ mtx }; } + std::for_each(std::execution::par, numbers.begin(), numbers.end(), [](auto& n) { + std::scoped_lock _{ mtx }; ++n; + }); + { std::scoped_lock _{ mtx }; } // TSAN: race with numbers vector destructor +} + +//TEST_CASE("race1", "[tsan]") +//{ +// std::vector numbers2{ 0, 1, 2, 3 }; +// std::for_each(std::execution::par, numbers2.begin(), numbers2.end(), [](auto& n) { +// ++n; +// }); +//} + +//TEST_CASE("race2", "[tsan]") +//{ +// std::vector numbers2{ 0, 1, 2, 3 }; +// std::for_each(std::execution::par, numbers2.begin(), numbers2.end(), [](auto& n) { +// numbers[0] += n; +// }); +//}