diff --git a/CMakeLists.txt b/CMakeLists.txt index fadc37f0..5bc2d860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,8 @@ generate_export_header(Pipe pipe_target_define_platform(Pipe) target_include_directories(Pipe PUBLIC $) -file(GLOB_RECURSE PIPE_SOURCE_FILES CONFIGURE_DEPENDS Src/*.cpp Src/*.h) +target_include_directories(Pipe PRIVATE $) +file(GLOB_RECURSE PIPE_SOURCE_FILES CONFIGURE_DEPENDS Src/*.cpp Src/*.c) target_sources(Pipe PRIVATE ${PIPE_SOURCE_FILES}) target_compile_definitions(Pipe PRIVATE NOMINMAX) diff --git a/Include/Pipe/Core/String.h b/Include/Pipe/Core/String.h index 01666852..39ac5de0 100644 --- a/Include/Pipe/Core/String.h +++ b/Include/Pipe/Core/String.h @@ -7,7 +7,7 @@ #include "Pipe/Core/STDFormat.h" #include "Pipe/Core/StringView.h" #include "Pipe/Core/Utility.h" -#include "Pipe/Extern/utf8.h" +#include "Pipe/Extern/utf8/unchecked.h" #include "Pipe/Memory/STLAllocator.h" #include "PipeArrays.h" #include "PipeSerializeFwd.h" @@ -120,19 +120,19 @@ namespace p } else if constexpr (sizeof(FromChar) == 1 && sizeof(ToChar) == 2) { - utf8::utf8to16(source.begin(), source.end(), std::back_inserter(dest)); + utf8::unchecked::utf8to16(source.begin(), source.end(), std::back_inserter(dest)); } else if constexpr (sizeof(FromChar) == 2 && sizeof(ToChar) == 1) { - utf8::utf16to8(source.begin(), source.end(), std::back_inserter(dest)); + utf8::unchecked::utf16to8(source.begin(), source.end(), std::back_inserter(dest)); } else if constexpr (sizeof(FromChar) == 1 && sizeof(ToChar) == 4) { - utf8::utf8to32(source.begin(), source.end(), std::back_inserter(dest)); + utf8::unchecked::utf8to32(source.begin(), source.end(), std::back_inserter(dest)); } else if constexpr (sizeof(FromChar) == 4 && sizeof(ToChar) == 1) { - utf8::utf32to8(source.begin(), source.end(), std::back_inserter(dest)); + utf8::unchecked::utf32to8(source.begin(), source.end(), std::back_inserter(dest)); } else { diff --git a/Include/Pipe/Extern/sparse_hash.h b/Include/Pipe/Extern/sparse_hash.h index 90a7fc60..23871daf 100644 --- a/Include/Pipe/Extern/sparse_hash.h +++ b/Include/Pipe/Extern/sparse_hash.h @@ -24,6 +24,8 @@ #ifndef TSL_SPARSE_HASH_H #define TSL_SPARSE_HASH_H +#include "Pipe/Extern/sparse_growth_policy.h" + #include #include #include @@ -38,2199 +40,2493 @@ #include #include -#include "Pipe/Extern/sparse_growth_policy.h" #ifdef __INTEL_COMPILER -#include // For _popcnt32 and _popcnt64 + #include // For _popcnt32 and _popcnt64 #endif #ifdef _MSC_VER -#include // For __cpuid, __popcnt and __popcnt64 + #include // For __cpuid, __popcnt and __popcnt64 #endif #ifdef TSL_DEBUG -#define tsl_sh_assert(expr) assert(expr) + #define tsl_sh_assert(expr) assert(expr) #else -#define tsl_sh_assert(expr) (static_cast(0)) + #define tsl_sh_assert(expr) (static_cast(0)) #endif -namespace tsl { - -namespace sh { -enum class probing { linear, quadratic }; - -enum class exception_safety { basic, strong }; - -enum class sparsity { high, medium, low }; -} // namespace sh - -namespace detail_popcount { -/** - * Define the popcount(ll) methods and pick-up the best depending on the - * compiler. - */ - -// From Wikipedia: https://en.wikipedia.org/wiki/Hamming_weight -inline int fallback_popcountll(unsigned long long int x) { - static_assert( - sizeof(unsigned long long int) == sizeof(std::uint64_t), - "sizeof(unsigned long long int) must be equal to sizeof(std::uint64_t). " - "Open a feature request if you need support for a platform where it " - "isn't the case."); - - const std::uint64_t m1 = 0x5555555555555555ull; - const std::uint64_t m2 = 0x3333333333333333ull; - const std::uint64_t m4 = 0x0f0f0f0f0f0f0f0full; - const std::uint64_t h01 = 0x0101010101010101ull; - - x -= (x >> 1ull) & m1; - x = (x & m2) + ((x >> 2ull) & m2); - x = (x + (x >> 4ull)) & m4; - return static_cast((x * h01) >> (64ull - 8ull)); -} - -inline int fallback_popcount(unsigned int x) { - static_assert(sizeof(unsigned int) == sizeof(std::uint32_t) || - sizeof(unsigned int) == sizeof(std::uint64_t), - "sizeof(unsigned int) must be equal to sizeof(std::uint32_t) " - "or sizeof(std::uint64_t). " - "Open a feature request if you need support for a platform " - "where it isn't the case."); - - if (sizeof(unsigned int) == sizeof(std::uint32_t)) { - const std::uint32_t m1 = 0x55555555; - const std::uint32_t m2 = 0x33333333; - const std::uint32_t m4 = 0x0f0f0f0f; - const std::uint32_t h01 = 0x01010101; - - x -= (x >> 1) & m1; - x = (x & m2) + ((x >> 2) & m2); - x = (x + (x >> 4)) & m4; - return static_cast((x * h01) >> (32 - 8)); - } else { - return fallback_popcountll(x); - } -} +namespace tsl +{ + + namespace sh + { + enum class probing + { + linear, + quadratic + }; + + enum class exception_safety + { + basic, + strong + }; + + enum class sparsity + { + high, + medium, + low + }; + } // namespace sh + + namespace detail_popcount + { + /** + * Define the popcount(ll) methods and pick-up the best depending on the + * compiler. + */ + + // From Wikipedia: https://en.wikipedia.org/wiki/Hamming_weight + inline int fallback_popcountll(unsigned long long int x) + { + static_assert(sizeof(unsigned long long int) == sizeof(std::uint64_t), + "sizeof(unsigned long long int) must be equal to sizeof(std::uint64_t). " + "Open a feature request if you need support for a platform where it " + "isn't the case."); + + const std::uint64_t m1 = 0x5555555555555555ull; + const std::uint64_t m2 = 0x3333333333333333ull; + const std::uint64_t m4 = 0x0f0f0f0f0f0f0f0full; + const std::uint64_t h01 = 0x0101010101010101ull; + + x -= (x >> 1ull) & m1; + x = (x & m2) + ((x >> 2ull) & m2); + x = (x + (x >> 4ull)) & m4; + return static_cast((x * h01) >> (64ull - 8ull)); + } + + inline int fallback_popcount(unsigned int x) + { + static_assert(sizeof(unsigned int) == sizeof(std::uint32_t) + || sizeof(unsigned int) == sizeof(std::uint64_t), + "sizeof(unsigned int) must be equal to sizeof(std::uint32_t) " + "or sizeof(std::uint64_t). " + "Open a feature request if you need support for a platform " + "where it isn't the case."); + + if (sizeof(unsigned int) == sizeof(std::uint32_t)) + { + const std::uint32_t m1 = 0x55555555; + const std::uint32_t m2 = 0x33333333; + const std::uint32_t m4 = 0x0f0f0f0f; + const std::uint32_t h01 = 0x01010101; + + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return static_cast((x * h01) >> (32 - 8)); + } + else + { + return fallback_popcountll(x); + } + } #if defined(__clang__) || defined(__GNUC__) -inline int popcountll(unsigned long long int value) { - return __builtin_popcountll(value); -} + inline int popcountll(unsigned long long int value) + { + return __builtin_popcountll(value); + } -inline int popcount(unsigned int value) { return __builtin_popcount(value); } + inline int popcount(unsigned int value) + { + return __builtin_popcount(value); + } #elif defined(_MSC_VER) -/** - * We need to check for popcount support at runtime on Windows with __cpuid - * See https://msdn.microsoft.com/en-us/library/bb385231.aspx - */ -inline bool has_popcount_support() { - int cpu_infos[4]; - __cpuid(cpu_infos, 1); - return (cpu_infos[2] & (1 << 23)) != 0; -} - -inline int popcountll(unsigned long long int value) { -#ifdef _WIN64 - static_assert( - sizeof(unsigned long long int) == sizeof(std::int64_t), - "sizeof(unsigned long long int) must be equal to sizeof(std::int64_t). "); - - static const bool has_popcount = has_popcount_support(); - return has_popcount - ? static_cast(__popcnt64(static_cast(value))) - : fallback_popcountll(value); -#else - return fallback_popcountll(value); -#endif -} - -inline int popcount(unsigned int value) { - static_assert(sizeof(unsigned int) == sizeof(std::int32_t), - "sizeof(unsigned int) must be equal to sizeof(std::int32_t). "); - - static const bool has_popcount = has_popcount_support(); - return has_popcount - ? static_cast(__popcnt(static_cast(value))) - : fallback_popcount(value); -} + /** + * We need to check for popcount support at runtime on Windows with __cpuid + * See https://msdn.microsoft.com/en-us/library/bb385231.aspx + */ + inline bool has_popcount_support() + { + int cpu_infos[4]; + __cpuid(cpu_infos, 1); + return (cpu_infos[2] & (1 << 23)) != 0; + } + + inline int popcountll(unsigned long long int value) + { + #ifdef _WIN64 + static_assert(sizeof(unsigned long long int) == sizeof(std::int64_t), + "sizeof(unsigned long long int) must be equal to sizeof(std::int64_t). "); + + static const bool has_popcount = has_popcount_support(); + return has_popcount ? static_cast(__popcnt64(static_cast(value))) + : fallback_popcountll(value); + #else + return fallback_popcountll(value); + #endif + } + + inline int popcount(unsigned int value) + { + static_assert(sizeof(unsigned int) == sizeof(std::int32_t), + "sizeof(unsigned int) must be equal to sizeof(std::int32_t). "); + + static const bool has_popcount = has_popcount_support(); + return has_popcount ? static_cast(__popcnt(static_cast(value))) + : fallback_popcount(value); + } #elif defined(__INTEL_COMPILER) -inline int popcountll(unsigned long long int value) { - static_assert(sizeof(unsigned long long int) == sizeof(__int64), ""); - return _popcnt64(static_cast<__int64>(value)); -} + inline int popcountll(unsigned long long int value) + { + static_assert(sizeof(unsigned long long int) == sizeof(__int64), ""); + return _popcnt64(static_cast<__int64>(value)); + } -inline int popcount(unsigned int value) { - return _popcnt32(static_cast(value)); -} + inline int popcount(unsigned int value) + { + return _popcnt32(static_cast(value)); + } #else -inline int popcountll(unsigned long long int x) { - return fallback_popcountll(x); -} + inline int popcountll(unsigned long long int x) + { + return fallback_popcountll(x); + } -inline int popcount(unsigned int x) { return fallback_popcount(x); } + inline int popcount(unsigned int x) + { + return fallback_popcount(x); + } #endif -} // namespace detail_popcount - -namespace detail_sparse_hash { - -template -struct make_void { - using type = void; -}; - -template -struct has_is_transparent : std::false_type {}; - -template -struct has_is_transparent::type> - : std::true_type {}; - -template -struct is_power_of_two_policy : std::false_type {}; - -template -struct is_power_of_two_policy> - : std::true_type {}; - -inline constexpr bool is_power_of_two(std::size_t value) { - return value != 0 && (value & (value - 1)) == 0; -} - -inline std::size_t round_up_to_power_of_two(std::size_t value) { - if (is_power_of_two(value)) { - return value; - } - - if (value == 0) { - return 1; - } - - --value; - for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { - value |= value >> i; - } - - return value + 1; -} - -template -static T numeric_cast(U value, - const char *error_message = "numeric_cast() failed.") { - T ret = static_cast(value); - if (static_cast(ret) != value) { - TSL_SH_THROW_OR_ABORT(std::runtime_error, error_message); - } - - const bool is_same_signedness = - (std::is_unsigned::value && std::is_unsigned::value) || - (std::is_signed::value && std::is_signed::value); - if (!is_same_signedness && (ret < T{}) != (value < U{})) { - TSL_SH_THROW_OR_ABORT(std::runtime_error, error_message); - } - - return ret; -} - -/** - * Fixed size type used to represent size_type values on serialization. Need to - * be big enough to represent a std::size_t on 32 and 64 bits platforms, and - * must be the same size on both platforms. - */ -using slz_size_type = std::uint64_t; -static_assert(std::numeric_limits::max() >= - std::numeric_limits::max(), - "slz_size_type must be >= std::size_t"); - -template -static T deserialize_value(Deserializer &deserializer) { - // MSVC < 2017 is not conformant, circumvent the problem by removing the - // template keyword + } // namespace detail_popcount + + namespace detail_sparse_hash + { + + template + struct make_void + { + using type = void; + }; + + template + struct has_is_transparent : std::false_type + {}; + + template + struct has_is_transparent::type> + : std::true_type + {}; + + template + struct is_power_of_two_policy : std::false_type + {}; + + template + struct is_power_of_two_policy> + : std::true_type + {}; + + inline constexpr bool is_power_of_two(std::size_t value) + { + return value != 0 && (value & (value - 1)) == 0; + } + + inline std::size_t round_up_to_power_of_two(std::size_t value) + { + if (is_power_of_two(value)) + { + return value; + } + + if (value == 0) + { + return 1; + } + + --value; + for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) + { + value |= value >> i; + } + + return value + 1; + } + + template + static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") + { + T ret = static_cast(value); + if (static_cast(ret) != value) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, error_message); + } + + const bool is_same_signedness = + (std::is_unsigned::value && std::is_unsigned::value) + || (std::is_signed::value && std::is_signed::value); + if (!is_same_signedness && (ret < T{}) != (value < U{})) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, error_message); + } + + return ret; + } + + /** + * Fixed size type used to represent size_type values on serialization. Need to + * be big enough to represent a std::size_t on 32 and 64 bits platforms, and + * must be the same size on both platforms. + */ + using slz_size_type = std::uint64_t; + static_assert( + std::numeric_limits::max() >= std::numeric_limits::max(), + "slz_size_type must be >= std::size_t"); + + template + static T deserialize_value(Deserializer& deserializer) + { + // MSVC < 2017 is not conformant, circumvent the problem by removing the + // template keyword #if defined(_MSC_VER) && _MSC_VER < 1910 - return deserializer.Deserializer::operator()(); + return deserializer.Deserializer::operator()(); #else - return deserializer.Deserializer::template operator()(); + return deserializer.Deserializer::template operator()(); #endif -} - -/** - * WARNING: the sparse_array class doesn't free the ressources allocated through - * the allocator passed in parameter in each method. You have to manually call - * `clear(Allocator&)` when you don't need a sparse_array object anymore. - * - * The reason is that the sparse_array doesn't store the allocator to avoid - * wasting space in each sparse_array when the allocator has a size > 0. It only - * allocates/deallocates objects with the allocator that is passed in parameter. - * - * - * - * Index denotes a value between [0, BITMAP_NB_BITS), it is an index similar to - * std::vector. Offset denotes the real position in `m_values` corresponding to - * an index. - * - * We are using raw pointers instead of std::vector to avoid loosing - * 2*sizeof(size_t) bytes to store the capacity and size of the vector in each - * sparse_array. We know we can only store up to BITMAP_NB_BITS elements in the - * array, we don't need such big types. - * - * - * T must be nothrow move constructible and/or copy constructible. - * Behaviour is undefined if the destructor of T throws an exception. - * - * See https://smerity.com/articles/2015/google_sparsehash.html for details on - * the idea behinds the implementation. - * - * TODO Check to use std::realloc and std::memmove when possible - */ -template -class sparse_array { - public: - using value_type = T; - using size_type = std::uint_least8_t; - using allocator_type = Allocator; - using iterator = value_type *; - using const_iterator = const value_type *; - - private: - static const size_type CAPACITY_GROWTH_STEP = - (Sparsity == tsl::sh::sparsity::high) ? 2 - : (Sparsity == tsl::sh::sparsity::medium) - ? 4 - : 8; // (Sparsity == tsl::sh::sparsity::low) - - /** - * Bitmap size configuration. - * Use 32 bits for the bitmap on 32-bits or less environnement as popcount on - * 64 bits numbers is slow on these environnement. Use 64 bits bitmap - * otherwise. - */ + } + + /** + * WARNING: the sparse_array class doesn't free the ressources allocated through + * the allocator passed in parameter in each method. You have to manually call + * `clear(Allocator&)` when you don't need a sparse_array object anymore. + * + * The reason is that the sparse_array doesn't store the allocator to avoid + * wasting space in each sparse_array when the allocator has a size > 0. It only + * allocates/deallocates objects with the allocator that is passed in parameter. + * + * + * + * Index denotes a value between [0, BITMAP_NB_BITS), it is an index similar to + * std::vector. Offset denotes the real position in `m_values` corresponding to + * an index. + * + * We are using raw pointers instead of std::vector to avoid loosing + * 2*sizeof(size_t) bytes to store the capacity and size of the vector in each + * sparse_array. We know we can only store up to BITMAP_NB_BITS elements in the + * array, we don't need such big types. + * + * + * T must be nothrow move constructible and/or copy constructible. + * Behaviour is undefined if the destructor of T throws an exception. + * + * See https://smerity.com/articles/2015/google_sparsehash.html for details on + * the idea behinds the implementation. + * + * TODO Check to use std::realloc and std::memmove when possible + */ + template + class sparse_array + { + public: + using value_type = T; + using size_type = std::uint_least8_t; + using allocator_type = Allocator; + using iterator = value_type*; + using const_iterator = const value_type*; + + private: + static const size_type CAPACITY_GROWTH_STEP = + (Sparsity == tsl::sh::sparsity::high) ? 2 + : (Sparsity == tsl::sh::sparsity::medium) + ? 4 + : 8; // (Sparsity == tsl::sh::sparsity::low) + + /** + * Bitmap size configuration. + * Use 32 bits for the bitmap on 32-bits or less environnement as popcount on + * 64 bits numbers is slow on these environnement. Use 64 bits bitmap + * otherwise. + */ #if SIZE_MAX <= UINT32_MAX - using bitmap_type = std::uint_least32_t; - static const std::size_t BITMAP_NB_BITS = 32; - static const std::size_t BUCKET_SHIFT = 5; + using bitmap_type = std::uint_least32_t; + static const std::size_t BITMAP_NB_BITS = 32; + static const std::size_t BUCKET_SHIFT = 5; #else - using bitmap_type = std::uint_least64_t; - static const std::size_t BITMAP_NB_BITS = 64; - static const std::size_t BUCKET_SHIFT = 6; + using bitmap_type = std::uint_least64_t; + static const std::size_t BITMAP_NB_BITS = 64; + static const std::size_t BUCKET_SHIFT = 6; #endif - static const std::size_t BUCKET_MASK = BITMAP_NB_BITS - 1; - - static_assert(is_power_of_two(BITMAP_NB_BITS), - "BITMAP_NB_BITS must be a power of two."); - static_assert(std::numeric_limits::digits >= BITMAP_NB_BITS, - "bitmap_type must be able to hold at least BITMAP_NB_BITS."); - static_assert((std::size_t(1) << BUCKET_SHIFT) == BITMAP_NB_BITS, - "(1 << BUCKET_SHIFT) must be equal to BITMAP_NB_BITS."); - static_assert(std::numeric_limits::max() >= BITMAP_NB_BITS, - "size_type must be big enough to hold BITMAP_NB_BITS."); - static_assert(std::is_unsigned::value, - "bitmap_type must be unsigned."); - static_assert((std::numeric_limits::max() & BUCKET_MASK) == - BITMAP_NB_BITS - 1, - ""); - - public: - /** - * Map an ibucket [0, bucket_count) in the hash table to a sparse_ibucket - * (a sparse_array holds multiple buckets, so there is less sparse_array than - * bucket_count). - * - * The bucket ibucket is in - * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] - * instead of something like m_buckets[ibucket] in a classical hash table. - */ - static std::size_t sparse_ibucket(std::size_t ibucket) { - return ibucket >> BUCKET_SHIFT; - } - - /** - * Map an ibucket [0, bucket_count) in the hash table to an index in the - * sparse_array which corresponds to the bucket. - * - * The bucket ibucket is in - * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] - * instead of something like m_buckets[ibucket] in a classical hash table. - */ - static typename sparse_array::size_type index_in_sparse_bucket( - std::size_t ibucket) { - return static_cast( - ibucket & sparse_array::BUCKET_MASK); - } - - static std::size_t nb_sparse_buckets(std::size_t bucket_count) noexcept { - if (bucket_count == 0) { - return 0; - } - - return std::max( - 1, sparse_ibucket(tsl::detail_sparse_hash::round_up_to_power_of_two( - bucket_count))); - } - - public: - sparse_array() noexcept - : m_values(nullptr), - m_bitmap_vals(0), - m_bitmap_deleted_vals(0), - m_nb_elements(0), - m_capacity(0), - m_last_array(false) {} - - explicit sparse_array(bool last_bucket) noexcept - : m_values(nullptr), - m_bitmap_vals(0), - m_bitmap_deleted_vals(0), - m_nb_elements(0), - m_capacity(0), - m_last_array(last_bucket) {} - - sparse_array(size_type capacity, Allocator &alloc) - : m_values(nullptr), - m_bitmap_vals(0), - m_bitmap_deleted_vals(0), - m_nb_elements(0), - m_capacity(capacity), - m_last_array(false) { - if (m_capacity > 0) { - m_values = alloc.allocate(m_capacity); - tsl_sh_assert(m_values != - nullptr); // allocate should throw if there is a failure - } - } - - sparse_array(const sparse_array &other, Allocator &alloc) - : m_values(nullptr), - m_bitmap_vals(other.m_bitmap_vals), - m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), - m_nb_elements(0), - m_capacity(other.m_capacity), - m_last_array(other.m_last_array) { - tsl_sh_assert(other.m_capacity >= other.m_nb_elements); - if (m_capacity == 0) { - return; - } - - m_values = alloc.allocate(m_capacity); - tsl_sh_assert(m_values != - nullptr); // allocate should throw if there is a failure - TSL_SH_TRY { - for (size_type i = 0; i < other.m_nb_elements; i++) { - construct_value(alloc, m_values + i, other.m_values[i]); - m_nb_elements++; - } - } - TSL_SH_CATCH(...) { - clear(alloc); - TSL_SH_RETRHOW; - } - } - - sparse_array(sparse_array &&other) noexcept - : m_values(other.m_values), - m_bitmap_vals(other.m_bitmap_vals), - m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), - m_nb_elements(other.m_nb_elements), - m_capacity(other.m_capacity), - m_last_array(other.m_last_array) { - other.m_values = nullptr; - other.m_bitmap_vals = 0; - other.m_bitmap_deleted_vals = 0; - other.m_nb_elements = 0; - other.m_capacity = 0; - } - - sparse_array(sparse_array &&other, Allocator &alloc) - : m_values(nullptr), - m_bitmap_vals(other.m_bitmap_vals), - m_bitmap_deleted_vals(other.m_bitmap_deleted_vals), - m_nb_elements(0), - m_capacity(other.m_capacity), - m_last_array(other.m_last_array) { - tsl_sh_assert(other.m_capacity >= other.m_nb_elements); - if (m_capacity == 0) { - return; - } - - m_values = alloc.allocate(m_capacity); - tsl_sh_assert(m_values != - nullptr); // allocate should throw if there is a failure - TSL_SH_TRY { - for (size_type i = 0; i < other.m_nb_elements; i++) { - construct_value(alloc, m_values + i, std::move(other.m_values[i])); - m_nb_elements++; - } - } - TSL_SH_CATCH(...) { - clear(alloc); - TSL_SH_RETRHOW; - } - } - - sparse_array &operator=(const sparse_array &) = delete; - sparse_array &operator=(sparse_array &&) = delete; - - ~sparse_array() noexcept { - // The code that manages the sparse_array must have called clear before - // destruction. See documentation of sparse_array for more details. - tsl_sh_assert(m_capacity == 0 && m_nb_elements == 0 && m_values == nullptr); - } - - iterator begin() noexcept { return m_values; } - iterator end() noexcept { return m_values + m_nb_elements; } - const_iterator begin() const noexcept { return cbegin(); } - const_iterator end() const noexcept { return cend(); } - const_iterator cbegin() const noexcept { return m_values; } - const_iterator cend() const noexcept { return m_values + m_nb_elements; } - - bool empty() const noexcept { return m_nb_elements == 0; } - - size_type size() const noexcept { return m_nb_elements; } - - void clear(allocator_type &alloc) noexcept { - destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); - - m_values = nullptr; - m_bitmap_vals = 0; - m_bitmap_deleted_vals = 0; - m_nb_elements = 0; - m_capacity = 0; - } - - bool last() const noexcept { return m_last_array; } - - void set_as_last() noexcept { m_last_array = true; } - - bool has_value(size_type index) const noexcept { - tsl_sh_assert(index < BITMAP_NB_BITS); - return (m_bitmap_vals & (bitmap_type(1) << index)) != 0; - } - - bool has_deleted_value(size_type index) const noexcept { - tsl_sh_assert(index < BITMAP_NB_BITS); - return (m_bitmap_deleted_vals & (bitmap_type(1) << index)) != 0; - } - - iterator value(size_type index) noexcept { - tsl_sh_assert(has_value(index)); - return m_values + index_to_offset(index); - } - - const_iterator value(size_type index) const noexcept { - tsl_sh_assert(has_value(index)); - return m_values + index_to_offset(index); - } - - /** - * Return iterator to set value. - */ - template - iterator set(allocator_type &alloc, size_type index, Args &&...value_args) { - tsl_sh_assert(!has_value(index)); - - const size_type offset = index_to_offset(index); - insert_at_offset(alloc, offset, std::forward(value_args)...); - - m_bitmap_vals = (m_bitmap_vals | (bitmap_type(1) << index)); - m_bitmap_deleted_vals = - (m_bitmap_deleted_vals & ~(bitmap_type(1) << index)); - - m_nb_elements++; - - tsl_sh_assert(has_value(index)); - tsl_sh_assert(!has_deleted_value(index)); - - return m_values + offset; - } - - iterator erase(allocator_type &alloc, iterator position) { - const size_type offset = - static_cast(std::distance(begin(), position)); - return erase(alloc, position, offset_to_index(offset)); - } - - // Return the next value or end if no next value - iterator erase(allocator_type &alloc, iterator position, size_type index) { - tsl_sh_assert(has_value(index)); - tsl_sh_assert(!has_deleted_value(index)); - - const size_type offset = - static_cast(std::distance(begin(), position)); - erase_at_offset(alloc, offset); - - m_bitmap_vals = (m_bitmap_vals & ~(bitmap_type(1) << index)); - m_bitmap_deleted_vals = (m_bitmap_deleted_vals | (bitmap_type(1) << index)); - - m_nb_elements--; - - tsl_sh_assert(!has_value(index)); - tsl_sh_assert(has_deleted_value(index)); - - return m_values + offset; - } - - void swap(sparse_array &other) { - using std::swap; - - swap(m_values, other.m_values); - swap(m_bitmap_vals, other.m_bitmap_vals); - swap(m_bitmap_deleted_vals, other.m_bitmap_deleted_vals); - swap(m_nb_elements, other.m_nb_elements); - swap(m_capacity, other.m_capacity); - swap(m_last_array, other.m_last_array); - } - - static iterator mutable_iterator(const_iterator pos) { - return const_cast(pos); - } - - template - void serialize(Serializer &serializer) const { - const slz_size_type sparse_bucket_size = m_nb_elements; - serializer(sparse_bucket_size); - - const slz_size_type bitmap_vals = m_bitmap_vals; - serializer(bitmap_vals); - - const slz_size_type bitmap_deleted_vals = m_bitmap_deleted_vals; - serializer(bitmap_deleted_vals); - - for (const value_type &value : *this) { - serializer(value); - } - } - - template - static sparse_array deserialize_hash_compatible(Deserializer &deserializer, - Allocator &alloc) { - const slz_size_type sparse_bucket_size = - deserialize_value(deserializer); - const slz_size_type bitmap_vals = - deserialize_value(deserializer); - const slz_size_type bitmap_deleted_vals = - deserialize_value(deserializer); - - if (sparse_bucket_size > BITMAP_NB_BITS) { - TSL_SH_THROW_OR_ABORT( - std::runtime_error, - "Deserialized sparse_bucket_size is too big for the platform. " - "Maximum should be BITMAP_NB_BITS."); - } - - sparse_array sarray; - if (sparse_bucket_size == 0) { - return sarray; - } - - sarray.m_bitmap_vals = numeric_cast( - bitmap_vals, "Deserialized bitmap_vals is too big."); - sarray.m_bitmap_deleted_vals = numeric_cast( - bitmap_deleted_vals, "Deserialized bitmap_deleted_vals is too big."); - - sarray.m_capacity = numeric_cast( - sparse_bucket_size, "Deserialized sparse_bucket_size is too big."); - sarray.m_values = alloc.allocate(sarray.m_capacity); - - TSL_SH_TRY { - for (size_type ivalue = 0; ivalue < sarray.m_capacity; ivalue++) { - construct_value(alloc, sarray.m_values + ivalue, - deserialize_value(deserializer)); - sarray.m_nb_elements++; - } - } - TSL_SH_CATCH(...) { - sarray.clear(alloc); - TSL_SH_RETRHOW; - } - - return sarray; - } - - /** - * Deserialize the values of the bucket and insert them all in sparse_hash - * through sparse_hash.insert(...). - */ - template - static void deserialize_values_into_sparse_hash(Deserializer &deserializer, - SparseHash &sparse_hash) { - const slz_size_type sparse_bucket_size = - deserialize_value(deserializer); - - const slz_size_type bitmap_vals = - deserialize_value(deserializer); - static_cast(bitmap_vals); // Ignore, not needed - - const slz_size_type bitmap_deleted_vals = - deserialize_value(deserializer); - static_cast(bitmap_deleted_vals); // Ignore, not needed - - for (slz_size_type ivalue = 0; ivalue < sparse_bucket_size; ivalue++) { - sparse_hash.insert(deserialize_value(deserializer)); - } - } - - private: - template - static void construct_value(allocator_type &alloc, value_type *value, - Args &&...value_args) { - std::allocator_traits::construct( - alloc, value, std::forward(value_args)...); - } - - static void destroy_value(allocator_type &alloc, value_type *value) noexcept { - std::allocator_traits::destroy(alloc, value); - } - - static void destroy_and_deallocate_values( - allocator_type &alloc, value_type *values, size_type nb_values, - size_type capacity_values) noexcept { - for (size_type i = 0; i < nb_values; i++) { - destroy_value(alloc, values + i); - } - - alloc.deallocate(values, capacity_values); - } - - static size_type popcount(bitmap_type val) noexcept { - if (sizeof(bitmap_type) <= sizeof(unsigned int)) { - return static_cast( - tsl::detail_popcount::popcount(static_cast(val))); - } else { - return static_cast(tsl::detail_popcount::popcountll(val)); - } - } - - size_type index_to_offset(size_type index) const noexcept { - tsl_sh_assert(index < BITMAP_NB_BITS); - return popcount(m_bitmap_vals & - ((bitmap_type(1) << index) - bitmap_type(1))); - } - - // TODO optimize - size_type offset_to_index(size_type offset) const noexcept { - tsl_sh_assert(offset < m_nb_elements); - - bitmap_type bitmap_vals = m_bitmap_vals; - size_type index = 0; - size_type nb_ones = 0; - - while (bitmap_vals != 0) { - if ((bitmap_vals & 0x1) == 1) { - if (nb_ones == offset) { - break; - } - - nb_ones++; - } - - index++; - bitmap_vals = bitmap_vals >> 1; - } - - return index; - } - - size_type next_capacity() const noexcept { - return static_cast(m_capacity + CAPACITY_GROWTH_STEP); - } - - /** - * Insertion - * - * Two situations: - * - Either we are in a situation where - * std::is_nothrow_move_constructible::value is true. In this - * case, on insertion we just reallocate m_values when we reach its capacity - * (i.e. m_nb_elements == m_capacity), otherwise we just put the new value at - * its appropriate place. We can easily keep the strong exception guarantee as - * moving the values around is safe. - * - Otherwise we are in a situation where - * std::is_nothrow_move_constructible::value is false. In this - * case on EACH insertion we allocate a new area of m_nb_elements + 1 where we - * copy the values of m_values into it and put the new value there. On - * success, we set m_values to this new area. Even if slower, it's the only - * way to preserve to strong exception guarantee. - */ - template ::value>::type * = nullptr> - void insert_at_offset(allocator_type &alloc, size_type offset, - Args &&...value_args) { - if (m_nb_elements < m_capacity) { - insert_at_offset_no_realloc(alloc, offset, - std::forward(value_args)...); - } else { - insert_at_offset_realloc(alloc, offset, next_capacity(), - std::forward(value_args)...); - } - } - - template ::value>::type * = nullptr> - void insert_at_offset(allocator_type &alloc, size_type offset, - Args &&...value_args) { - insert_at_offset_realloc(alloc, offset, m_nb_elements + 1, - std::forward(value_args)...); - } - - template ::value>::type * = nullptr> - void insert_at_offset_no_realloc(allocator_type &alloc, size_type offset, - Args &&...value_args) { - tsl_sh_assert(offset <= m_nb_elements); - tsl_sh_assert(m_nb_elements < m_capacity); - - for (size_type i = m_nb_elements; i > offset; i--) { - construct_value(alloc, m_values + i, std::move(m_values[i - 1])); - destroy_value(alloc, m_values + i - 1); - } - - TSL_SH_TRY { - construct_value(alloc, m_values + offset, - std::forward(value_args)...); - } - TSL_SH_CATCH(...) { - for (size_type i = offset; i < m_nb_elements; i++) { - construct_value(alloc, m_values + i, std::move(m_values[i + 1])); - destroy_value(alloc, m_values + i + 1); - } - TSL_SH_RETRHOW; - } - } - - template ::value>::type * = nullptr> - void insert_at_offset_realloc(allocator_type &alloc, size_type offset, - size_type new_capacity, Args &&...value_args) { - tsl_sh_assert(new_capacity > m_nb_elements); - - value_type *new_values = alloc.allocate(new_capacity); - // Allocate should throw if there is a failure - tsl_sh_assert(new_values != nullptr); - - TSL_SH_TRY { - construct_value(alloc, new_values + offset, - std::forward(value_args)...); - } - TSL_SH_CATCH(...) { - alloc.deallocate(new_values, new_capacity); - TSL_SH_RETRHOW; - } - - // Should not throw from here - for (size_type i = 0; i < offset; i++) { - construct_value(alloc, new_values + i, std::move(m_values[i])); - } - - for (size_type i = offset; i < m_nb_elements; i++) { - construct_value(alloc, new_values + i + 1, std::move(m_values[i])); - } - - destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); - - m_values = new_values; - m_capacity = new_capacity; - } - - template ::value>::type * = nullptr> - void insert_at_offset_realloc(allocator_type &alloc, size_type offset, - size_type new_capacity, Args &&...value_args) { - tsl_sh_assert(new_capacity > m_nb_elements); - - value_type *new_values = alloc.allocate(new_capacity); - // Allocate should throw if there is a failure - tsl_sh_assert(new_values != nullptr); - - size_type nb_new_values = 0; - TSL_SH_TRY { - for (size_type i = 0; i < offset; i++) { - construct_value(alloc, new_values + i, m_values[i]); - nb_new_values++; - } - - construct_value(alloc, new_values + offset, - std::forward(value_args)...); - nb_new_values++; - - for (size_type i = offset; i < m_nb_elements; i++) { - construct_value(alloc, new_values + i + 1, m_values[i]); - nb_new_values++; - } - } - TSL_SH_CATCH(...) { - destroy_and_deallocate_values(alloc, new_values, nb_new_values, - new_capacity); - TSL_SH_RETRHOW; - } - - tsl_sh_assert(nb_new_values == m_nb_elements + 1); - - destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); - - m_values = new_values; - m_capacity = new_capacity; - } - - /** - * Erasure - * - * Two situations: - * - Either we are in a situation where - * std::is_nothrow_move_constructible::value is true. Simply - * destroy the value and left-shift move the value on the right of offset. - * - Otherwise we are in a situation where - * std::is_nothrow_move_constructible::value is false. Copy all - * the values except the one at offset into a new heap area. On success, we - * set m_values to this new area. Even if slower, it's the only way to - * preserve to strong exception guarantee. - */ - template ::value>::type * = nullptr> - void erase_at_offset(allocator_type &alloc, size_type offset) noexcept { - tsl_sh_assert(offset < m_nb_elements); - - destroy_value(alloc, m_values + offset); - - for (size_type i = offset + 1; i < m_nb_elements; i++) { - construct_value(alloc, m_values + i - 1, std::move(m_values[i])); - destroy_value(alloc, m_values + i); - } - } - - template ::value>::type * = nullptr> - void erase_at_offset(allocator_type &alloc, size_type offset) { - tsl_sh_assert(offset < m_nb_elements); - - // Erasing the last element, don't need to reallocate. We keep the capacity. - if (offset + 1 == m_nb_elements) { - destroy_value(alloc, m_values + offset); - return; - } - - tsl_sh_assert(m_nb_elements > 1); - const size_type new_capacity = m_nb_elements - 1; - - value_type *new_values = alloc.allocate(new_capacity); - // Allocate should throw if there is a failure - tsl_sh_assert(new_values != nullptr); - - size_type nb_new_values = 0; - TSL_SH_TRY { - for (size_type i = 0; i < m_nb_elements; i++) { - if (i != offset) { - construct_value(alloc, new_values + nb_new_values, m_values[i]); - nb_new_values++; - } - } - } - TSL_SH_CATCH(...) { - destroy_and_deallocate_values(alloc, new_values, nb_new_values, - new_capacity); - TSL_SH_RETRHOW; - } - - tsl_sh_assert(nb_new_values == m_nb_elements - 1); - - destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); - - m_values = new_values; - m_capacity = new_capacity; - } - - private: - value_type *m_values; - - bitmap_type m_bitmap_vals; - bitmap_type m_bitmap_deleted_vals; - - size_type m_nb_elements; - size_type m_capacity; - bool m_last_array; -}; - -/** - * Internal common class used by `sparse_map` and `sparse_set`. - * - * `ValueType` is what will be stored by `sparse_hash` (usually `std::pair` for map and `Key` for set). - * - * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in - * parameter and returns a reference to the key. - * - * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in - * parameter and returns a reference to the value. `ValueSelect` should be void - * if there is no value (in a set for example). - * - * The strong exception guarantee only holds if `ExceptionSafety` is set to - * `tsl::sh::exception_safety::strong`. - * - * `ValueType` must be nothrow move constructible and/or copy constructible. - * Behaviour is undefined if the destructor of `ValueType` throws. - * - * - * The class holds its buckets in a 2-dimensional fashion. Instead of having a - * linear `std::vector` for [0, bucket_count) where each bucket stores - * one value, we have a `std::vector` (m_sparse_buckets_data) - * where each `sparse_array` stores multiple values (up to - * `sparse_array::BITMAP_NB_BITS`). To convert a one dimensional `ibucket` - * position to a position in `std::vector` and a position in - * `sparse_array`, use respectively the methods - * `sparse_array::sparse_ibucket(ibucket)` and - * `sparse_array::index_in_sparse_bucket(ibucket)`. - */ -template -class sparse_hash : private Allocator, - private Hash, - private KeyEqual, - private GrowthPolicy { - private: - template - using has_mapped_type = - typename std::integral_constant::value>; - - static_assert( - noexcept(std::declval().bucket_for_hash(std::size_t(0))), - "GrowthPolicy::bucket_for_hash must be noexcept."); - static_assert(noexcept(std::declval().clear()), - "GrowthPolicy::clear must be noexcept."); - - public: - template - class sparse_iterator; - - using key_type = typename KeySelect::key_type; - using value_type = ValueType; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using hasher = Hash; - using key_equal = KeyEqual; - using allocator_type = Allocator; - using reference = value_type &; - using const_reference = const value_type &; - using pointer = value_type *; - using const_pointer = const value_type *; - using iterator = sparse_iterator; - using const_iterator = sparse_iterator; - - private: - using sparse_array = - tsl::detail_sparse_hash::sparse_array; - - using sparse_buckets_allocator = typename std::allocator_traits< - allocator_type>::template rebind_alloc; - using sparse_buckets_container = - std::vector; - - public: - /** - * The `operator*()` and `operator->()` methods return a const reference and - * const pointer respectively to the stored value type (`Key` for a set, - * `std::pair` for a map). - * - * In case of a map, to get a mutable reference to the value `T` associated to - * a key (the `.second` in the stored pair), you have to call `value()`. - */ - template - class sparse_iterator { - friend class sparse_hash; - - private: - using sparse_bucket_iterator = typename std::conditional< - IsConst, typename sparse_buckets_container::const_iterator, - typename sparse_buckets_container::iterator>::type; - - using sparse_array_iterator = - typename std::conditional::type; - - /** - * sparse_array_it should be nullptr if sparse_bucket_it == - * m_sparse_buckets_data.end(). (TODO better way?) - */ - sparse_iterator(sparse_bucket_iterator sparse_bucket_it, - sparse_array_iterator sparse_array_it) - : m_sparse_buckets_it(sparse_bucket_it), - m_sparse_array_it(sparse_array_it) {} - - public: - using iterator_category = std::forward_iterator_tag; - using value_type = const typename sparse_hash::value_type; - using difference_type = std::ptrdiff_t; - using reference = value_type &; - using pointer = value_type *; - - sparse_iterator() noexcept {} - - // Copy constructor from iterator to const_iterator. - template ::type * = nullptr> - sparse_iterator(const sparse_iterator &other) noexcept - : m_sparse_buckets_it(other.m_sparse_buckets_it), - m_sparse_array_it(other.m_sparse_array_it) {} - - sparse_iterator(const sparse_iterator &other) = default; - sparse_iterator(sparse_iterator &&other) = default; - sparse_iterator &operator=(const sparse_iterator &other) = default; - sparse_iterator &operator=(sparse_iterator &&other) = default; - - const typename sparse_hash::key_type &key() const { - return KeySelect()(*m_sparse_array_it); - } - - template ::value && - IsConst>::type * = nullptr> - const typename U::value_type &value() const { - return U()(*m_sparse_array_it); - } - - template ::value && - !IsConst>::type * = nullptr> - typename U::value_type &value() { - return U()(*m_sparse_array_it); - } - - reference operator*() const { return *m_sparse_array_it; } - - pointer operator->() const { return std::addressof(*m_sparse_array_it); } - - sparse_iterator &operator++() { - tsl_sh_assert(m_sparse_array_it != nullptr); - ++m_sparse_array_it; - - if (m_sparse_array_it == m_sparse_buckets_it->end()) { - do { - if (m_sparse_buckets_it->last()) { - ++m_sparse_buckets_it; - m_sparse_array_it = nullptr; - return *this; - } - - ++m_sparse_buckets_it; - } while (m_sparse_buckets_it->empty()); - - m_sparse_array_it = m_sparse_buckets_it->begin(); - } - - return *this; - } - - sparse_iterator operator++(int) { - sparse_iterator tmp(*this); - ++*this; - - return tmp; - } - - friend bool operator==(const sparse_iterator &lhs, - const sparse_iterator &rhs) { - return lhs.m_sparse_buckets_it == rhs.m_sparse_buckets_it && - lhs.m_sparse_array_it == rhs.m_sparse_array_it; - } - - friend bool operator!=(const sparse_iterator &lhs, - const sparse_iterator &rhs) { - return !(lhs == rhs); - } - - private: - sparse_bucket_iterator m_sparse_buckets_it; - sparse_array_iterator m_sparse_array_it; - }; - - public: - sparse_hash(size_type bucket_count, const Hash &hash, const KeyEqual &equal, - const Allocator &alloc, float max_load_factor) - : Allocator(alloc), - Hash(hash), - KeyEqual(equal), - GrowthPolicy(bucket_count), - m_sparse_buckets_data(alloc), - m_sparse_buckets(static_empty_sparse_bucket_ptr()), - m_bucket_count(bucket_count), - m_nb_elements(0), - m_nb_deleted_buckets(0) { - if (m_bucket_count > max_bucket_count()) { - TSL_SH_THROW_OR_ABORT(std::length_error, - "The map exceeds its maximum size."); - } - - if (m_bucket_count > 0) { - /* - * We can't use the `vector(size_type count, const Allocator& alloc)` - * constructor as it's only available in C++14 and we need to support - * C++11. We thus must resize after using the `vector(const Allocator& - * alloc)` constructor. - * - * We can't use `vector(size_type count, const T& value, const Allocator& - * alloc)` as it requires the value T to be copyable. - */ - m_sparse_buckets_data.resize( - sparse_array::nb_sparse_buckets(bucket_count)); - m_sparse_buckets = m_sparse_buckets_data.data(); - - tsl_sh_assert(!m_sparse_buckets_data.empty()); - m_sparse_buckets_data.back().set_as_last(); - } - - this->max_load_factor(max_load_factor); - - // Check in the constructor instead of outside of a function to avoid - // compilation issues when value_type is not complete. - static_assert(std::is_nothrow_move_constructible::value || - std::is_copy_constructible::value, - "Key, and T if present, must be nothrow move constructible " - "and/or copy constructible."); - } - - ~sparse_hash() { clear(); } - - sparse_hash(const sparse_hash &other) - : Allocator(std::allocator_traits< - Allocator>::select_on_container_copy_construction(other)), - Hash(other), - KeyEqual(other), - GrowthPolicy(other), - m_sparse_buckets_data( - std::allocator_traits< - Allocator>::select_on_container_copy_construction(other)), - m_bucket_count(other.m_bucket_count), - m_nb_elements(other.m_nb_elements), - m_nb_deleted_buckets(other.m_nb_deleted_buckets), - m_load_threshold_rehash(other.m_load_threshold_rehash), - m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted), - m_max_load_factor(other.m_max_load_factor) { - copy_buckets_from(other), - m_sparse_buckets = m_sparse_buckets_data.empty() - ? static_empty_sparse_bucket_ptr() - : m_sparse_buckets_data.data(); - } - - sparse_hash(sparse_hash &&other) noexcept( - std::is_nothrow_move_constructible::value - &&std::is_nothrow_move_constructible::value - &&std::is_nothrow_move_constructible::value - &&std::is_nothrow_move_constructible::value - &&std::is_nothrow_move_constructible< - sparse_buckets_container>::value) - : Allocator(std::move(other)), - Hash(std::move(other)), - KeyEqual(std::move(other)), - GrowthPolicy(std::move(other)), - m_sparse_buckets_data(std::move(other.m_sparse_buckets_data)), - m_sparse_buckets(m_sparse_buckets_data.empty() - ? static_empty_sparse_bucket_ptr() - : m_sparse_buckets_data.data()), - m_bucket_count(other.m_bucket_count), - m_nb_elements(other.m_nb_elements), - m_nb_deleted_buckets(other.m_nb_deleted_buckets), - m_load_threshold_rehash(other.m_load_threshold_rehash), - m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted), - m_max_load_factor(other.m_max_load_factor) { - other.GrowthPolicy::clear(); - other.m_sparse_buckets_data.clear(); - other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); - other.m_bucket_count = 0; - other.m_nb_elements = 0; - other.m_nb_deleted_buckets = 0; - other.m_load_threshold_rehash = 0; - other.m_load_threshold_clear_deleted = 0; - } - - sparse_hash &operator=(const sparse_hash &other) { - if (this != &other) { - clear(); - - if (std::allocator_traits< - Allocator>::propagate_on_container_copy_assignment::value) { - Allocator::operator=(other); - } - - Hash::operator=(other); - KeyEqual::operator=(other); - GrowthPolicy::operator=(other); - - if (std::allocator_traits< - Allocator>::propagate_on_container_copy_assignment::value) { - m_sparse_buckets_data = - sparse_buckets_container(static_cast(other)); - } else { - if (m_sparse_buckets_data.size() != - other.m_sparse_buckets_data.size()) { - m_sparse_buckets_data = - sparse_buckets_container(static_cast(*this)); - } else { - m_sparse_buckets_data.clear(); - } - } - - copy_buckets_from(other); - m_sparse_buckets = m_sparse_buckets_data.empty() - ? static_empty_sparse_bucket_ptr() - : m_sparse_buckets_data.data(); - - m_bucket_count = other.m_bucket_count; - m_nb_elements = other.m_nb_elements; - m_nb_deleted_buckets = other.m_nb_deleted_buckets; - m_load_threshold_rehash = other.m_load_threshold_rehash; - m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; - m_max_load_factor = other.m_max_load_factor; - } - - return *this; - } - - sparse_hash &operator=(sparse_hash &&other) { - clear(); - - if (std::allocator_traits< - Allocator>::propagate_on_container_move_assignment::value) { - static_cast(*this) = - std::move(static_cast(other)); - m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); - } else if (static_cast(*this) != - static_cast(other)) { - move_buckets_from(std::move(other)); - } else { - static_cast(*this) = - std::move(static_cast(other)); - m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); - } - - m_sparse_buckets = m_sparse_buckets_data.empty() - ? static_empty_sparse_bucket_ptr() - : m_sparse_buckets_data.data(); - - static_cast(*this) = std::move(static_cast(other)); - static_cast(*this) = std::move(static_cast(other)); - static_cast(*this) = - std::move(static_cast(other)); - m_bucket_count = other.m_bucket_count; - m_nb_elements = other.m_nb_elements; - m_nb_deleted_buckets = other.m_nb_deleted_buckets; - m_load_threshold_rehash = other.m_load_threshold_rehash; - m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; - m_max_load_factor = other.m_max_load_factor; - - other.GrowthPolicy::clear(); - other.m_sparse_buckets_data.clear(); - other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); - other.m_bucket_count = 0; - other.m_nb_elements = 0; - other.m_nb_deleted_buckets = 0; - other.m_load_threshold_rehash = 0; - other.m_load_threshold_clear_deleted = 0; - - return *this; - } - - allocator_type get_allocator() const { - return static_cast(*this); - } - - /* - * Iterators - */ - iterator begin() noexcept { - auto begin = m_sparse_buckets_data.begin(); - while (begin != m_sparse_buckets_data.end() && begin->empty()) { - ++begin; - } - - return iterator(begin, (begin != m_sparse_buckets_data.end()) - ? begin->begin() - : nullptr); - } - - const_iterator begin() const noexcept { return cbegin(); } - - const_iterator cbegin() const noexcept { - auto begin = m_sparse_buckets_data.cbegin(); - while (begin != m_sparse_buckets_data.cend() && begin->empty()) { - ++begin; - } - - return const_iterator(begin, (begin != m_sparse_buckets_data.cend()) - ? begin->cbegin() - : nullptr); - } - - iterator end() noexcept { - return iterator(m_sparse_buckets_data.end(), nullptr); - } - - const_iterator end() const noexcept { return cend(); } - - const_iterator cend() const noexcept { - return const_iterator(m_sparse_buckets_data.cend(), nullptr); - } - - /* - * Capacity - */ - bool empty() const noexcept { return m_nb_elements == 0; } - - size_type size() const noexcept { return m_nb_elements; } - - size_type max_size() const noexcept { - return std::min(std::allocator_traits::max_size(), - m_sparse_buckets_data.max_size()); - } - - /* - * Modifiers - */ - void clear() noexcept { - for (auto &bucket : m_sparse_buckets_data) { - bucket.clear(*this); - } - - m_nb_elements = 0; - m_nb_deleted_buckets = 0; - } - - template - std::pair insert(P &&value) { - return insert_impl(KeySelect()(value), std::forward

(value)); - } - - template - iterator insert_hint(const_iterator hint, P &&value) { - if (hint != cend() && - compare_keys(KeySelect()(*hint), KeySelect()(value))) { - return mutable_iterator(hint); - } - - return insert(std::forward

(value)).first; - } - - template - void insert(InputIt first, InputIt last) { - if (std::is_base_of< - std::forward_iterator_tag, - typename std::iterator_traits::iterator_category>::value) { - const auto nb_elements_insert = std::distance(first, last); - const size_type nb_free_buckets = m_load_threshold_rehash - size(); - tsl_sh_assert(m_load_threshold_rehash >= size()); - - if (nb_elements_insert > 0 && - nb_free_buckets < size_type(nb_elements_insert)) { - reserve(size() + size_type(nb_elements_insert)); - } - } - - for (; first != last; ++first) { - insert(*first); - } - } - - template - std::pair insert_or_assign(K &&key, M &&obj) { - auto it = try_emplace(std::forward(key), std::forward(obj)); - if (!it.second) { - it.first.value() = std::forward(obj); - } - - return it; - } - - template - iterator insert_or_assign(const_iterator hint, K &&key, M &&obj) { - if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { - auto it = mutable_iterator(hint); - it.value() = std::forward(obj); - - return it; - } - - return insert_or_assign(std::forward(key), std::forward(obj)).first; - } - - template - std::pair emplace(Args &&...args) { - return insert(value_type(std::forward(args)...)); - } - - template - iterator emplace_hint(const_iterator hint, Args &&...args) { - return insert_hint(hint, value_type(std::forward(args)...)); - } - - template - std::pair try_emplace(K &&key, Args &&...args) { - return insert_impl(key, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple(std::forward(args)...)); - } - - template - iterator try_emplace_hint(const_iterator hint, K &&key, Args &&...args) { - if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { - return mutable_iterator(hint); - } - - return try_emplace(std::forward(key), std::forward(args)...).first; - } - - /** - * Here to avoid `template size_type erase(const K& key)` being used - * when we use an iterator instead of a const_iterator. - */ - iterator erase(iterator pos) { - tsl_sh_assert(pos != end() && m_nb_elements > 0); - auto it_sparse_array_next = - pos.m_sparse_buckets_it->erase(*this, pos.m_sparse_array_it); - m_nb_elements--; - m_nb_deleted_buckets++; - - if (it_sparse_array_next == pos.m_sparse_buckets_it->end()) { - auto it_sparse_buckets_next = pos.m_sparse_buckets_it; - do { - ++it_sparse_buckets_next; - } while (it_sparse_buckets_next != m_sparse_buckets_data.end() && - it_sparse_buckets_next->empty()); - - if (it_sparse_buckets_next == m_sparse_buckets_data.end()) { - return end(); - } else { - return iterator(it_sparse_buckets_next, - it_sparse_buckets_next->begin()); - } - } else { - return iterator(pos.m_sparse_buckets_it, it_sparse_array_next); - } - } - - iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); } - - iterator erase(const_iterator first, const_iterator last) { - if (first == last) { - return mutable_iterator(first); - } - - // TODO Optimize, could avoid the call to std::distance. - const size_type nb_elements_to_erase = - static_cast(std::distance(first, last)); - auto to_delete = mutable_iterator(first); - for (size_type i = 0; i < nb_elements_to_erase; i++) { - to_delete = erase(to_delete); - } - - return to_delete; - } - - template - size_type erase(const K &key) { - return erase(key, hash_key(key)); - } - - template - size_type erase(const K &key, std::size_t hash) { - return erase_impl(key, hash); - } - - void swap(sparse_hash &other) { - using std::swap; - - if (std::allocator_traits::propagate_on_container_swap::value) { - swap(static_cast(*this), static_cast(other)); - } else { - tsl_sh_assert(static_cast(*this) == - static_cast(other)); - } - - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), - static_cast(other)); - swap(m_sparse_buckets_data, other.m_sparse_buckets_data); - swap(m_sparse_buckets, other.m_sparse_buckets); - swap(m_bucket_count, other.m_bucket_count); - swap(m_nb_elements, other.m_nb_elements); - swap(m_nb_deleted_buckets, other.m_nb_deleted_buckets); - swap(m_load_threshold_rehash, other.m_load_threshold_rehash); - swap(m_load_threshold_clear_deleted, other.m_load_threshold_clear_deleted); - swap(m_max_load_factor, other.m_max_load_factor); - } - - /* - * Lookup - */ - template < - class K, class U = ValueSelect, - typename std::enable_if::value>::type * = nullptr> - typename U::value_type &at(const K &key) { - return at(key, hash_key(key)); - } - - template < - class K, class U = ValueSelect, - typename std::enable_if::value>::type * = nullptr> - typename U::value_type &at(const K &key, std::size_t hash) { - return const_cast( - static_cast(this)->at(key, hash)); - } - - template < - class K, class U = ValueSelect, - typename std::enable_if::value>::type * = nullptr> - const typename U::value_type &at(const K &key) const { - return at(key, hash_key(key)); - } - - template < - class K, class U = ValueSelect, - typename std::enable_if::value>::type * = nullptr> - const typename U::value_type &at(const K &key, std::size_t hash) const { - auto it = find(key, hash); - if (it != cend()) { - return it.value(); - } else { - TSL_SH_THROW_OR_ABORT(std::out_of_range, "Couldn't find key."); - } - } - - template < - class K, class U = ValueSelect, - typename std::enable_if::value>::type * = nullptr> - typename U::value_type &operator[](K &&key) { - return try_emplace(std::forward(key)).first.value(); - } - - template - bool contains(const K &key) const { - return contains(key, hash_key(key)); - } - - template - bool contains(const K &key, std::size_t hash) const { - return count(key, hash) != 0; - } - - template - size_type count(const K &key) const { - return count(key, hash_key(key)); - } - - template - size_type count(const K &key, std::size_t hash) const { - if (find(key, hash) != cend()) { - return 1; - } else { - return 0; - } - } - - template - iterator find(const K &key) { - return find_impl(key, hash_key(key)); - } - - template - iterator find(const K &key, std::size_t hash) { - return find_impl(key, hash); - } - - template - const_iterator find(const K &key) const { - return find_impl(key, hash_key(key)); - } - - template - const_iterator find(const K &key, std::size_t hash) const { - return find_impl(key, hash); - } - - template - std::pair equal_range(const K &key) { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range(const K &key, std::size_t hash) { - iterator it = find(key, hash); - return std::make_pair(it, (it == end()) ? it : std::next(it)); - } - - template - std::pair equal_range(const K &key) const { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range( - const K &key, std::size_t hash) const { - const_iterator it = find(key, hash); - return std::make_pair(it, (it == cend()) ? it : std::next(it)); - } - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_bucket_count; } - - size_type max_bucket_count() const { - return m_sparse_buckets_data.max_size(); - } - - /* - * Hash policy - */ - float load_factor() const { - if (bucket_count() == 0) { - return 0; - } - - return float(m_nb_elements) / float(bucket_count()); - } - - float max_load_factor() const { return m_max_load_factor; } - - void max_load_factor(float ml) { - m_max_load_factor = std::max(0.1f, std::min(ml, 0.8f)); - m_load_threshold_rehash = - size_type(float(bucket_count()) * m_max_load_factor); - - const float max_load_factor_with_deleted_buckets = - m_max_load_factor + 0.5f * (1.0f - m_max_load_factor); - tsl_sh_assert(max_load_factor_with_deleted_buckets > 0.0f && - max_load_factor_with_deleted_buckets <= 1.0f); - m_load_threshold_clear_deleted = - size_type(float(bucket_count()) * max_load_factor_with_deleted_buckets); - } - - void rehash(size_type count) { - count = std::max(count, - size_type(std::ceil(float(size()) / max_load_factor()))); - rehash_impl(count); - } - - void reserve(size_type count) { - rehash(size_type(std::ceil(float(count) / max_load_factor()))); - } - - /* - * Observers - */ - hasher hash_function() const { return static_cast(*this); } - - key_equal key_eq() const { return static_cast(*this); } - - /* - * Other - */ - iterator mutable_iterator(const_iterator pos) { - auto it_sparse_buckets = - m_sparse_buckets_data.begin() + - std::distance(m_sparse_buckets_data.cbegin(), pos.m_sparse_buckets_it); - - return iterator(it_sparse_buckets, - sparse_array::mutable_iterator(pos.m_sparse_array_it)); - } - - template - void serialize(Serializer &serializer) const { - serialize_impl(serializer); - } - - template - void deserialize(Deserializer &deserializer, bool hash_compatible) { - deserialize_impl(deserializer, hash_compatible); - } - - private: - template - std::size_t hash_key(const K &key) const { - return Hash::operator()(key); - } - - template - bool compare_keys(const K1 &key1, const K2 &key2) const { - return KeyEqual::operator()(key1, key2); - } - - size_type bucket_for_hash(std::size_t hash) const { - const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); - tsl_sh_assert(sparse_array::sparse_ibucket(bucket) < - m_sparse_buckets_data.size() || - (bucket == 0 && m_sparse_buckets_data.empty())); - - return bucket; - } - - template ::value>::type * = - nullptr> - size_type next_bucket(size_type ibucket, size_type iprobe) const { - (void)iprobe; - if (Probing == tsl::sh::probing::linear) { - return (ibucket + 1) & this->m_mask; - } else { - tsl_sh_assert(Probing == tsl::sh::probing::quadratic); - return (ibucket + iprobe) & this->m_mask; - } - } - - template ::value>::type * = - nullptr> - size_type next_bucket(size_type ibucket, size_type iprobe) const { - (void)iprobe; - if (Probing == tsl::sh::probing::linear) { - ibucket++; - return (ibucket != bucket_count()) ? ibucket : 0; - } else { - tsl_sh_assert(Probing == tsl::sh::probing::quadratic); - ibucket += iprobe; - return (ibucket < bucket_count()) ? ibucket : ibucket % bucket_count(); - } - } - - // TODO encapsulate m_sparse_buckets_data to avoid the managing the allocator - void copy_buckets_from(const sparse_hash &other) { - m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); - - TSL_SH_TRY { - for (const auto &bucket : other.m_sparse_buckets_data) { - m_sparse_buckets_data.emplace_back(bucket, - static_cast(*this)); - } - } - TSL_SH_CATCH(...) { - clear(); - TSL_SH_RETRHOW; - } - - tsl_sh_assert(m_sparse_buckets_data.empty() || - m_sparse_buckets_data.back().last()); - } - - void move_buckets_from(sparse_hash &&other) { - m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); - - TSL_SH_TRY { - for (auto &&bucket : other.m_sparse_buckets_data) { - m_sparse_buckets_data.emplace_back(std::move(bucket), - static_cast(*this)); - } - } - TSL_SH_CATCH(...) { - clear(); - TSL_SH_RETRHOW; - } - - tsl_sh_assert(m_sparse_buckets_data.empty() || - m_sparse_buckets_data.back().last()); - } - - template - std::pair insert_impl(const K &key, - Args &&...value_type_args) { - /** - * We must insert the value in the first empty or deleted bucket we find. If - * we first find a deleted bucket, we still have to continue the search - * until we find an empty bucket or until we have searched all the buckets - * to be sure that the value is not in the hash table. We thus remember the - * position, if any, of the first deleted bucket we have encountered so we - * can insert it there if needed. - */ - bool found_first_deleted_bucket = false; - std::size_t sparse_ibucket_first_deleted = 0; - typename sparse_array::size_type index_in_sparse_bucket_first_deleted = 0; - - const std::size_t hash = hash_key(key); - std::size_t ibucket = bucket_for_hash(hash); - - std::size_t probe = 0; - while (true) { - std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); - auto index_in_sparse_bucket = - sparse_array::index_in_sparse_bucket(ibucket); - - if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { - auto value_it = - m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); - if (compare_keys(key, KeySelect()(*value_it))) { - return std::make_pair( - iterator(m_sparse_buckets_data.begin() + sparse_ibucket, - value_it), - false); - } - } else if (m_sparse_buckets[sparse_ibucket].has_deleted_value( - index_in_sparse_bucket) && - probe < m_bucket_count) { - if (!found_first_deleted_bucket) { - found_first_deleted_bucket = true; - sparse_ibucket_first_deleted = sparse_ibucket; - index_in_sparse_bucket_first_deleted = index_in_sparse_bucket; - } - } else { - /** - * At this point we are sure that the value does not exist - * in the hash table. - * First check if we satisfy load and delete thresholds, and if not, - * rehash the hash table (and therefore start over). Otherwise, just - * insert the value into the appropriate bucket. - */ - if (size() >= m_load_threshold_rehash) { - rehash_impl(GrowthPolicy::next_bucket_count()); - return insert_impl(key, std::forward(value_type_args)...); - } else if (size() + m_nb_deleted_buckets >= - m_load_threshold_clear_deleted) { - clear_deleted_buckets(); - return insert_impl(key, std::forward(value_type_args)...); - } - - if (found_first_deleted_bucket) { - auto it = insert_in_bucket(sparse_ibucket_first_deleted, - index_in_sparse_bucket_first_deleted, - std::forward(value_type_args)...); - m_nb_deleted_buckets--; - - return it; - } - - return insert_in_bucket(sparse_ibucket, index_in_sparse_bucket, - std::forward(value_type_args)...); - } - - probe++; - ibucket = next_bucket(ibucket, probe); - } - } - - template - std::pair insert_in_bucket( - std::size_t sparse_ibucket, - typename sparse_array::size_type index_in_sparse_bucket, - Args &&...value_type_args) { - auto value_it = m_sparse_buckets[sparse_ibucket].set( - *this, index_in_sparse_bucket, std::forward(value_type_args)...); - m_nb_elements++; - - return std::make_pair( - iterator(m_sparse_buckets_data.begin() + sparse_ibucket, value_it), - true); - } - - template - size_type erase_impl(const K &key, std::size_t hash) { - std::size_t ibucket = bucket_for_hash(hash); - - std::size_t probe = 0; - while (true) { - const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); - const auto index_in_sparse_bucket = - sparse_array::index_in_sparse_bucket(ibucket); - - if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { - auto value_it = - m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); - if (compare_keys(key, KeySelect()(*value_it))) { - m_sparse_buckets[sparse_ibucket].erase(*this, value_it, - index_in_sparse_bucket); - m_nb_elements--; - m_nb_deleted_buckets++; - - return 1; - } - } else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( - index_in_sparse_bucket) || - probe >= m_bucket_count) { - return 0; - } - - probe++; - ibucket = next_bucket(ibucket, probe); - } - } - - template - iterator find_impl(const K &key, std::size_t hash) { - return mutable_iterator( - static_cast(this)->find(key, hash)); - } - - template - const_iterator find_impl(const K &key, std::size_t hash) const { - std::size_t ibucket = bucket_for_hash(hash); - - std::size_t probe = 0; - while (true) { - const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); - const auto index_in_sparse_bucket = - sparse_array::index_in_sparse_bucket(ibucket); - - if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { - auto value_it = - m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); - if (compare_keys(key, KeySelect()(*value_it))) { - return const_iterator(m_sparse_buckets_data.cbegin() + sparse_ibucket, - value_it); - } - } else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( - index_in_sparse_bucket) || - probe >= m_bucket_count) { - return cend(); - } - - probe++; - ibucket = next_bucket(ibucket, probe); - } - } - - void clear_deleted_buckets() { - // TODO could be optimized, we could do it in-place instead of allocating a - // new bucket array. - rehash_impl(m_bucket_count); - tsl_sh_assert(m_nb_deleted_buckets == 0); - } - - template ::type - * = nullptr> - void rehash_impl(size_type count) { - sparse_hash new_table(count, static_cast(*this), - static_cast(*this), - static_cast(*this), m_max_load_factor); - - for (auto &bucket : m_sparse_buckets_data) { - for (auto &val : bucket) { - new_table.insert_on_rehash(std::move(val)); - } - - // TODO try to reuse some of the memory - bucket.clear(*this); - } - - new_table.swap(*this); - } - - /** - * TODO: For now we copy each element into the new map. We could move - * them if they are nothrow_move_constructible without triggering - * any exception if we reserve enough space in the sparse arrays beforehand. - */ - template ::type * = nullptr> - void rehash_impl(size_type count) { - sparse_hash new_table(count, static_cast(*this), - static_cast(*this), - static_cast(*this), m_max_load_factor); - - for (const auto &bucket : m_sparse_buckets_data) { - for (const auto &val : bucket) { - new_table.insert_on_rehash(val); - } - } - - new_table.swap(*this); - } - - template - void insert_on_rehash(K &&key_value) { - const std::size_t hash = hash_key(KeySelect()(key_value)); - std::size_t ibucket = bucket_for_hash(hash); - - std::size_t probe = 0; - while (true) { - std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); - auto index_in_sparse_bucket = - sparse_array::index_in_sparse_bucket(ibucket); - - if (!m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) { - m_sparse_buckets[sparse_ibucket].set(*this, index_in_sparse_bucket, - std::forward(key_value)); - m_nb_elements++; - - return; - } else { - tsl_sh_assert( - !compare_keys(KeySelect()(key_value), - KeySelect()(*m_sparse_buckets[sparse_ibucket].value( - index_in_sparse_bucket)))); - } - - probe++; - ibucket = next_bucket(ibucket, probe); - } - } - - template - void serialize_impl(Serializer &serializer) const { - const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION; - serializer(version); - - const slz_size_type bucket_count = m_bucket_count; - serializer(bucket_count); - - const slz_size_type nb_sparse_buckets = m_sparse_buckets_data.size(); - serializer(nb_sparse_buckets); - - const slz_size_type nb_elements = m_nb_elements; - serializer(nb_elements); - - const slz_size_type nb_deleted_buckets = m_nb_deleted_buckets; - serializer(nb_deleted_buckets); - - const float max_load_factor = m_max_load_factor; - serializer(max_load_factor); - - for (const auto &bucket : m_sparse_buckets_data) { - bucket.serialize(serializer); - } - } - - template - void deserialize_impl(Deserializer &deserializer, bool hash_compatible) { - tsl_sh_assert( - m_bucket_count == 0 && - m_sparse_buckets_data.empty()); // Current hash table must be empty - - const slz_size_type version = - deserialize_value(deserializer); - // For now we only have one version of the serialization protocol. - // If it doesn't match there is a problem with the file. - if (version != SERIALIZATION_PROTOCOL_VERSION) { - TSL_SH_THROW_OR_ABORT(std::runtime_error, - "Can't deserialize the sparse_map/set. The " - "protocol version header is invalid."); - } - - const slz_size_type bucket_count_ds = - deserialize_value(deserializer); - const slz_size_type nb_sparse_buckets = - deserialize_value(deserializer); - const slz_size_type nb_elements = - deserialize_value(deserializer); - const slz_size_type nb_deleted_buckets = - deserialize_value(deserializer); - const float max_load_factor = deserialize_value(deserializer); - - if (!hash_compatible) { - this->max_load_factor(max_load_factor); - reserve(numeric_cast(nb_elements, - "Deserialized nb_elements is too big.")); - for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) { - sparse_array::deserialize_values_into_sparse_hash(deserializer, *this); - } - } else { - m_bucket_count = numeric_cast( - bucket_count_ds, "Deserialized bucket_count is too big."); - - GrowthPolicy::operator=(GrowthPolicy(m_bucket_count)); - // GrowthPolicy should not modify the bucket count we got from - // deserialization - if (m_bucket_count != bucket_count_ds) { - TSL_SH_THROW_OR_ABORT(std::runtime_error, - "The GrowthPolicy is not the same even though " - "hash_compatible is true."); - } - - if (nb_sparse_buckets != - sparse_array::nb_sparse_buckets(m_bucket_count)) { - TSL_SH_THROW_OR_ABORT(std::runtime_error, - "Deserialized nb_sparse_buckets is invalid."); - } - - m_nb_elements = numeric_cast( - nb_elements, "Deserialized nb_elements is too big."); - m_nb_deleted_buckets = numeric_cast( - nb_deleted_buckets, "Deserialized nb_deleted_buckets is too big."); - - m_sparse_buckets_data.reserve(numeric_cast( - nb_sparse_buckets, "Deserialized nb_sparse_buckets is too big.")); - for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) { - m_sparse_buckets_data.emplace_back( - sparse_array::deserialize_hash_compatible( - deserializer, static_cast(*this))); - } - - if (!m_sparse_buckets_data.empty()) { - m_sparse_buckets_data.back().set_as_last(); - m_sparse_buckets = m_sparse_buckets_data.data(); - } - - this->max_load_factor(max_load_factor); - if (load_factor() > this->max_load_factor()) { - TSL_SH_THROW_OR_ABORT( - std::runtime_error, - "Invalid max_load_factor. Check that the serializer and " - "deserializer support " - "floats correctly as they can be converted implicitely to ints."); - } - } - } - - public: - static const size_type DEFAULT_INIT_BUCKET_COUNT = 0; - static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; - - /** - * Protocol version currenlty used for serialization. - */ - static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1; - - /** - * Return an always valid pointer to an static empty bucket_entry with - * last_bucket() == true. - */ - sparse_array *static_empty_sparse_bucket_ptr() { - static sparse_array empty_sparse_bucket(true); - return &empty_sparse_bucket; - } - - private: - sparse_buckets_container m_sparse_buckets_data; - - /** - * Points to m_sparse_buckets_data.data() if !m_sparse_buckets_data.empty() - * otherwise points to static_empty_sparse_bucket_ptr. This variable is useful - * to avoid the cost of checking if m_sparse_buckets_data is empty when trying - * to find an element. - * - * TODO Remove m_sparse_buckets_data and only use a pointer instead of a - * pointer+vector to save some space in the sparse_hash object. - */ - sparse_array *m_sparse_buckets; - - size_type m_bucket_count; - size_type m_nb_elements; - size_type m_nb_deleted_buckets; - - /** - * Maximum that m_nb_elements can reach before a rehash occurs automatically - * to grow the hash table. - */ - size_type m_load_threshold_rehash; - - /** - * Maximum that m_nb_elements + m_nb_deleted_buckets can reach before cleaning - * up the buckets marked as deleted. - */ - size_type m_load_threshold_clear_deleted; - float m_max_load_factor; -}; - -} // namespace detail_sparse_hash -} // namespace tsl + static const std::size_t BUCKET_MASK = BITMAP_NB_BITS - 1; + + static_assert( + is_power_of_two(BITMAP_NB_BITS), "BITMAP_NB_BITS must be a power of two."); + static_assert(std::numeric_limits::digits >= BITMAP_NB_BITS, + "bitmap_type must be able to hold at least BITMAP_NB_BITS."); + static_assert((std::size_t(1) << BUCKET_SHIFT) == BITMAP_NB_BITS, + "(1 << BUCKET_SHIFT) must be equal to BITMAP_NB_BITS."); + static_assert(std::numeric_limits::max() >= BITMAP_NB_BITS, + "size_type must be big enough to hold BITMAP_NB_BITS."); + static_assert(std::is_unsigned::value, "bitmap_type must be unsigned."); + static_assert( + (std::numeric_limits::max() & BUCKET_MASK) == BITMAP_NB_BITS - 1, ""); + + public: + /** + * Map an ibucket [0, bucket_count) in the hash table to a sparse_ibucket + * (a sparse_array holds multiple buckets, so there is less sparse_array than + * bucket_count). + * + * The bucket ibucket is in + * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] + * instead of something like m_buckets[ibucket] in a classical hash table. + */ + static std::size_t sparse_ibucket(std::size_t ibucket) + { + return ibucket >> BUCKET_SHIFT; + } + + /** + * Map an ibucket [0, bucket_count) in the hash table to an index in the + * sparse_array which corresponds to the bucket. + * + * The bucket ibucket is in + * m_sparse_buckets[sparse_ibucket(ibucket)][index_in_sparse_bucket(ibucket)] + * instead of something like m_buckets[ibucket] in a classical hash table. + */ + static typename sparse_array::size_type index_in_sparse_bucket(std::size_t ibucket) + { + return static_cast( + ibucket & sparse_array::BUCKET_MASK); + } + + static std::size_t nb_sparse_buckets(std::size_t bucket_count) noexcept + { + if (bucket_count == 0) + { + return 0; + } + + return std::max( + 1, sparse_ibucket( + tsl::detail_sparse_hash::round_up_to_power_of_two(bucket_count))); + } + + public: + sparse_array() noexcept + : m_values(nullptr) + , m_bitmap_vals(0) + , m_bitmap_deleted_vals(0) + , m_nb_elements(0) + , m_capacity(0) + , m_last_array(false) + {} + + explicit sparse_array(bool last_bucket) noexcept + : m_values(nullptr) + , m_bitmap_vals(0) + , m_bitmap_deleted_vals(0) + , m_nb_elements(0) + , m_capacity(0) + , m_last_array(last_bucket) + {} + + sparse_array(size_type capacity, Allocator& alloc) + : m_values(nullptr) + , m_bitmap_vals(0) + , m_bitmap_deleted_vals(0) + , m_nb_elements(0) + , m_capacity(capacity) + , m_last_array(false) + { + if (m_capacity > 0) + { + m_values = alloc.allocate(m_capacity); + tsl_sh_assert( + m_values != nullptr); // allocate should throw if there is a failure + } + } + + sparse_array(const sparse_array& other, Allocator& alloc) + : m_values(nullptr) + , m_bitmap_vals(other.m_bitmap_vals) + , m_bitmap_deleted_vals(other.m_bitmap_deleted_vals) + , m_nb_elements(0) + , m_capacity(other.m_capacity) + , m_last_array(other.m_last_array) + { + tsl_sh_assert(other.m_capacity >= other.m_nb_elements); + if (m_capacity == 0) + { + return; + } + + m_values = alloc.allocate(m_capacity); + tsl_sh_assert( + m_values != nullptr); // allocate should throw if there is a failure + TSL_SH_TRY + { + for (size_type i = 0; i < other.m_nb_elements; i++) + { + construct_value(alloc, m_values + i, other.m_values[i]); + m_nb_elements++; + } + } + TSL_SH_CATCH(...) + { + clear(alloc); + TSL_SH_RETRHOW; + } + } + + sparse_array(sparse_array&& other) noexcept + : m_values(other.m_values) + , m_bitmap_vals(other.m_bitmap_vals) + , m_bitmap_deleted_vals(other.m_bitmap_deleted_vals) + , m_nb_elements(other.m_nb_elements) + , m_capacity(other.m_capacity) + , m_last_array(other.m_last_array) + { + other.m_values = nullptr; + other.m_bitmap_vals = 0; + other.m_bitmap_deleted_vals = 0; + other.m_nb_elements = 0; + other.m_capacity = 0; + } + + sparse_array(sparse_array&& other, Allocator& alloc) + : m_values(nullptr) + , m_bitmap_vals(other.m_bitmap_vals) + , m_bitmap_deleted_vals(other.m_bitmap_deleted_vals) + , m_nb_elements(0) + , m_capacity(other.m_capacity) + , m_last_array(other.m_last_array) + { + tsl_sh_assert(other.m_capacity >= other.m_nb_elements); + if (m_capacity == 0) + { + return; + } + + m_values = alloc.allocate(m_capacity); + tsl_sh_assert( + m_values != nullptr); // allocate should throw if there is a failure + TSL_SH_TRY + { + for (size_type i = 0; i < other.m_nb_elements; i++) + { + construct_value(alloc, m_values + i, std::move(other.m_values[i])); + m_nb_elements++; + } + } + TSL_SH_CATCH(...) + { + clear(alloc); + TSL_SH_RETRHOW; + } + } + + sparse_array& operator=(const sparse_array&) = delete; + sparse_array& operator=(sparse_array&&) = delete; + + ~sparse_array() noexcept + { + // The code that manages the sparse_array must have called clear before + // destruction. See documentation of sparse_array for more details. + tsl_sh_assert(m_capacity == 0 && m_nb_elements == 0 && m_values == nullptr); + } + + iterator begin() noexcept + { + return m_values; + } + iterator end() noexcept + { + return m_values + m_nb_elements; + } + const_iterator begin() const noexcept + { + return cbegin(); + } + const_iterator end() const noexcept + { + return cend(); + } + const_iterator cbegin() const noexcept + { + return m_values; + } + const_iterator cend() const noexcept + { + return m_values + m_nb_elements; + } + + bool empty() const noexcept + { + return m_nb_elements == 0; + } + + size_type size() const noexcept + { + return m_nb_elements; + } + + void clear(allocator_type& alloc) noexcept + { + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = nullptr; + m_bitmap_vals = 0; + m_bitmap_deleted_vals = 0; + m_nb_elements = 0; + m_capacity = 0; + } + + bool last() const noexcept + { + return m_last_array; + } + + void set_as_last() noexcept + { + m_last_array = true; + } + + bool has_value(size_type index) const noexcept + { + tsl_sh_assert(index < BITMAP_NB_BITS); + return (m_bitmap_vals & (bitmap_type(1) << index)) != 0; + } + + bool has_deleted_value(size_type index) const noexcept + { + tsl_sh_assert(index < BITMAP_NB_BITS); + return (m_bitmap_deleted_vals & (bitmap_type(1) << index)) != 0; + } + + iterator value(size_type index) noexcept + { + tsl_sh_assert(has_value(index)); + return m_values + index_to_offset(index); + } + + const_iterator value(size_type index) const noexcept + { + tsl_sh_assert(has_value(index)); + return m_values + index_to_offset(index); + } + + /** + * Return iterator to set value. + */ + template + iterator set(allocator_type& alloc, size_type index, Args&&... value_args) + { + tsl_sh_assert(!has_value(index)); + + const size_type offset = index_to_offset(index); + insert_at_offset(alloc, offset, std::forward(value_args)...); + + m_bitmap_vals = (m_bitmap_vals | (bitmap_type(1) << index)); + m_bitmap_deleted_vals = (m_bitmap_deleted_vals & ~(bitmap_type(1) << index)); + + m_nb_elements++; + + tsl_sh_assert(has_value(index)); + tsl_sh_assert(!has_deleted_value(index)); + + return m_values + offset; + } + + iterator erase(allocator_type& alloc, iterator position) + { + const size_type offset = static_cast(std::distance(begin(), position)); + return erase(alloc, position, offset_to_index(offset)); + } + + // Return the next value or end if no next value + iterator erase(allocator_type& alloc, iterator position, size_type index) + { + tsl_sh_assert(has_value(index)); + tsl_sh_assert(!has_deleted_value(index)); + + const size_type offset = static_cast(std::distance(begin(), position)); + erase_at_offset(alloc, offset); + + m_bitmap_vals = (m_bitmap_vals & ~(bitmap_type(1) << index)); + m_bitmap_deleted_vals = (m_bitmap_deleted_vals | (bitmap_type(1) << index)); + + m_nb_elements--; + + tsl_sh_assert(!has_value(index)); + tsl_sh_assert(has_deleted_value(index)); + + return m_values + offset; + } + + void swap(sparse_array& other) + { + using std::swap; + + swap(m_values, other.m_values); + swap(m_bitmap_vals, other.m_bitmap_vals); + swap(m_bitmap_deleted_vals, other.m_bitmap_deleted_vals); + swap(m_nb_elements, other.m_nb_elements); + swap(m_capacity, other.m_capacity); + swap(m_last_array, other.m_last_array); + } + + static iterator mutable_iterator(const_iterator pos) + { + return const_cast(pos); + } + + template + void serialize(Serializer& serializer) const + { + const slz_size_type sparse_bucket_size = m_nb_elements; + serializer(sparse_bucket_size); + + const slz_size_type bitmap_vals = m_bitmap_vals; + serializer(bitmap_vals); + + const slz_size_type bitmap_deleted_vals = m_bitmap_deleted_vals; + serializer(bitmap_deleted_vals); + + for (const value_type& value : *this) + { + serializer(value); + } + } + + template + static sparse_array deserialize_hash_compatible( + Deserializer& deserializer, Allocator& alloc) + { + const slz_size_type sparse_bucket_size = + deserialize_value(deserializer); + const slz_size_type bitmap_vals = deserialize_value(deserializer); + const slz_size_type bitmap_deleted_vals = + deserialize_value(deserializer); + + if (sparse_bucket_size > BITMAP_NB_BITS) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, + "Deserialized sparse_bucket_size is too big for the platform. " + "Maximum should be BITMAP_NB_BITS."); + } + + sparse_array sarray; + if (sparse_bucket_size == 0) + { + return sarray; + } + + sarray.m_bitmap_vals = + numeric_cast(bitmap_vals, "Deserialized bitmap_vals is too big."); + sarray.m_bitmap_deleted_vals = numeric_cast( + bitmap_deleted_vals, "Deserialized bitmap_deleted_vals is too big."); + + sarray.m_capacity = numeric_cast( + sparse_bucket_size, "Deserialized sparse_bucket_size is too big."); + sarray.m_values = alloc.allocate(sarray.m_capacity); + + TSL_SH_TRY + { + for (size_type ivalue = 0; ivalue < sarray.m_capacity; ivalue++) + { + construct_value(alloc, sarray.m_values + ivalue, + deserialize_value(deserializer)); + sarray.m_nb_elements++; + } + } + TSL_SH_CATCH(...) + { + sarray.clear(alloc); + TSL_SH_RETRHOW; + } + + return sarray; + } + + /** + * Deserialize the values of the bucket and insert them all in sparse_hash + * through sparse_hash.insert(...). + */ + template + static void deserialize_values_into_sparse_hash( + Deserializer& deserializer, SparseHash& sparse_hash) + { + const slz_size_type sparse_bucket_size = + deserialize_value(deserializer); + + const slz_size_type bitmap_vals = deserialize_value(deserializer); + static_cast(bitmap_vals); // Ignore, not needed + + const slz_size_type bitmap_deleted_vals = + deserialize_value(deserializer); + static_cast(bitmap_deleted_vals); // Ignore, not needed + + for (slz_size_type ivalue = 0; ivalue < sparse_bucket_size; ivalue++) + { + sparse_hash.insert(deserialize_value(deserializer)); + } + } + + private: + template + static void construct_value( + allocator_type& alloc, value_type* value, Args&&... value_args) + { + std::allocator_traits::construct( + alloc, value, std::forward(value_args)...); + } + + static void destroy_value(allocator_type& alloc, value_type* value) noexcept + { + std::allocator_traits::destroy(alloc, value); + } + + static void destroy_and_deallocate_values(allocator_type& alloc, value_type* values, + size_type nb_values, size_type capacity_values) noexcept + { + for (size_type i = 0; i < nb_values; i++) + { + destroy_value(alloc, values + i); + } + + alloc.deallocate(values, capacity_values); + } + + static size_type popcount(bitmap_type val) noexcept + { + if (sizeof(bitmap_type) <= sizeof(unsigned int)) + { + return static_cast( + tsl::detail_popcount::popcount(static_cast(val))); + } + else + { + return static_cast(tsl::detail_popcount::popcountll(val)); + } + } + + size_type index_to_offset(size_type index) const noexcept + { + tsl_sh_assert(index < BITMAP_NB_BITS); + return popcount(m_bitmap_vals & ((bitmap_type(1) << index) - bitmap_type(1))); + } + + // TODO optimize + size_type offset_to_index(size_type offset) const noexcept + { + tsl_sh_assert(offset < m_nb_elements); + + bitmap_type bitmap_vals = m_bitmap_vals; + size_type index = 0; + size_type nb_ones = 0; + + while (bitmap_vals != 0) + { + if ((bitmap_vals & 0x1) == 1) + { + if (nb_ones == offset) + { + break; + } + + nb_ones++; + } + + index++; + bitmap_vals = bitmap_vals >> 1; + } + + return index; + } + + size_type next_capacity() const noexcept + { + return static_cast(m_capacity + CAPACITY_GROWTH_STEP); + } + + /** + * Insertion + * + * Two situations: + * - Either we are in a situation where + * std::is_nothrow_move_constructible::value is true. In this + * case, on insertion we just reallocate m_values when we reach its capacity + * (i.e. m_nb_elements == m_capacity), otherwise we just put the new value at + * its appropriate place. We can easily keep the strong exception guarantee as + * moving the values around is safe. + * - Otherwise we are in a situation where + * std::is_nothrow_move_constructible::value is false. In this + * case on EACH insertion we allocate a new area of m_nb_elements + 1 where we + * copy the values of m_values into it and put the new value there. On + * success, we set m_values to this new area. Even if slower, it's the only + * way to preserve to strong exception guarantee. + */ + template::value>::type* = + nullptr> + void insert_at_offset(allocator_type& alloc, size_type offset, Args&&... value_args) + { + if (m_nb_elements < m_capacity) + { + insert_at_offset_no_realloc(alloc, offset, std::forward(value_args)...); + } + else + { + insert_at_offset_realloc( + alloc, offset, next_capacity(), std::forward(value_args)...); + } + } + + template::value>::type* = + nullptr> + void insert_at_offset(allocator_type& alloc, size_type offset, Args&&... value_args) + { + insert_at_offset_realloc( + alloc, offset, m_nb_elements + 1, std::forward(value_args)...); + } + + template::value>::type* = + nullptr> + void insert_at_offset_no_realloc( + allocator_type& alloc, size_type offset, Args&&... value_args) + { + tsl_sh_assert(offset <= m_nb_elements); + tsl_sh_assert(m_nb_elements < m_capacity); + + for (size_type i = m_nb_elements; i > offset; i--) + { + construct_value(alloc, m_values + i, std::move(m_values[i - 1])); + destroy_value(alloc, m_values + i - 1); + } + + TSL_SH_TRY + { + construct_value(alloc, m_values + offset, std::forward(value_args)...); + } + TSL_SH_CATCH(...) + { + for (size_type i = offset; i < m_nb_elements; i++) + { + construct_value(alloc, m_values + i, std::move(m_values[i + 1])); + destroy_value(alloc, m_values + i + 1); + } + TSL_SH_RETRHOW; + } + } + + template::value>::type* = + nullptr> + void insert_at_offset_realloc(allocator_type& alloc, size_type offset, + size_type new_capacity, Args&&... value_args) + { + tsl_sh_assert(new_capacity > m_nb_elements); + + value_type* new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + TSL_SH_TRY + { + construct_value(alloc, new_values + offset, std::forward(value_args)...); + } + TSL_SH_CATCH(...) + { + alloc.deallocate(new_values, new_capacity); + TSL_SH_RETRHOW; + } + + // Should not throw from here + for (size_type i = 0; i < offset; i++) + { + construct_value(alloc, new_values + i, std::move(m_values[i])); + } + + for (size_type i = offset; i < m_nb_elements; i++) + { + construct_value(alloc, new_values + i + 1, std::move(m_values[i])); + } + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + template::value>::type* = + nullptr> + void insert_at_offset_realloc(allocator_type& alloc, size_type offset, + size_type new_capacity, Args&&... value_args) + { + tsl_sh_assert(new_capacity > m_nb_elements); + + value_type* new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + size_type nb_new_values = 0; + TSL_SH_TRY + { + for (size_type i = 0; i < offset; i++) + { + construct_value(alloc, new_values + i, m_values[i]); + nb_new_values++; + } + + construct_value(alloc, new_values + offset, std::forward(value_args)...); + nb_new_values++; + + for (size_type i = offset; i < m_nb_elements; i++) + { + construct_value(alloc, new_values + i + 1, m_values[i]); + nb_new_values++; + } + } + TSL_SH_CATCH(...) + { + destroy_and_deallocate_values(alloc, new_values, nb_new_values, new_capacity); + TSL_SH_RETRHOW; + } + + tsl_sh_assert(nb_new_values == m_nb_elements + 1); + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + /** + * Erasure + * + * Two situations: + * - Either we are in a situation where + * std::is_nothrow_move_constructible::value is true. Simply + * destroy the value and left-shift move the value on the right of offset. + * - Otherwise we are in a situation where + * std::is_nothrow_move_constructible::value is false. Copy all + * the values except the one at offset into a new heap area. On success, we + * set m_values to this new area. Even if slower, it's the only way to + * preserve to strong exception guarantee. + */ + template::value>::type* = + nullptr> + void erase_at_offset(allocator_type& alloc, size_type offset) noexcept + { + tsl_sh_assert(offset < m_nb_elements); + + destroy_value(alloc, m_values + offset); + + for (size_type i = offset + 1; i < m_nb_elements; i++) + { + construct_value(alloc, m_values + i - 1, std::move(m_values[i])); + destroy_value(alloc, m_values + i); + } + } + + template::value>::type* = + nullptr> + void erase_at_offset(allocator_type& alloc, size_type offset) + { + tsl_sh_assert(offset < m_nb_elements); + + // Erasing the last element, don't need to reallocate. We keep the capacity. + if (offset + 1 == m_nb_elements) + { + destroy_value(alloc, m_values + offset); + return; + } + + tsl_sh_assert(m_nb_elements > 1); + const size_type new_capacity = m_nb_elements - 1; + + value_type* new_values = alloc.allocate(new_capacity); + // Allocate should throw if there is a failure + tsl_sh_assert(new_values != nullptr); + + size_type nb_new_values = 0; + TSL_SH_TRY + { + for (size_type i = 0; i < m_nb_elements; i++) + { + if (i != offset) + { + construct_value(alloc, new_values + nb_new_values, m_values[i]); + nb_new_values++; + } + } + } + TSL_SH_CATCH(...) + { + destroy_and_deallocate_values(alloc, new_values, nb_new_values, new_capacity); + TSL_SH_RETRHOW; + } + + tsl_sh_assert(nb_new_values == m_nb_elements - 1); + + destroy_and_deallocate_values(alloc, m_values, m_nb_elements, m_capacity); + + m_values = new_values; + m_capacity = new_capacity; + } + + private: + value_type* m_values; + + bitmap_type m_bitmap_vals; + bitmap_type m_bitmap_deleted_vals; + + size_type m_nb_elements; + size_type m_capacity; + bool m_last_array; + }; + + /** + * Internal common class used by `sparse_map` and `sparse_set`. + * + * `ValueType` is what will be stored by `sparse_hash` (usually `std::pair` for map and `Key` for set). + * + * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in + * parameter and returns a reference to the key. + * + * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in + * parameter and returns a reference to the value. `ValueSelect` should be void + * if there is no value (in a set for example). + * + * The strong exception guarantee only holds if `ExceptionSafety` is set to + * `tsl::sh::exception_safety::strong`. + * + * `ValueType` must be nothrow move constructible and/or copy constructible. + * Behaviour is undefined if the destructor of `ValueType` throws. + * + * + * The class holds its buckets in a 2-dimensional fashion. Instead of having a + * linear `std::vector` for [0, bucket_count) where each bucket stores + * one value, we have a `std::vector` (m_sparse_buckets_data) + * where each `sparse_array` stores multiple values (up to + * `sparse_array::BITMAP_NB_BITS`). To convert a one dimensional `ibucket` + * position to a position in `std::vector` and a position in + * `sparse_array`, use respectively the methods + * `sparse_array::sparse_ibucket(ibucket)` and + * `sparse_array::index_in_sparse_bucket(ibucket)`. + */ + template + class sparse_hash : private Allocator, + private Hash, + private KeyEqual, + private GrowthPolicy + { + private: + template + using has_mapped_type = + typename std::integral_constant::value>; + + static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), + "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), + "GrowthPolicy::clear must be noexcept."); + + public: + template + class sparse_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = sparse_iterator; + using const_iterator = sparse_iterator; + + private: + using sparse_array = + tsl::detail_sparse_hash::sparse_array; + + using sparse_buckets_allocator = + typename std::allocator_traits::template rebind_alloc; + using sparse_buckets_container = std::vector; + + public: + /** + * The `operator*()` and `operator->()` methods return a const reference and + * const pointer respectively to the stored value type (`Key` for a set, + * `std::pair` for a map). + * + * In case of a map, to get a mutable reference to the value `T` associated to + * a key (the `.second` in the stored pair), you have to call `value()`. + */ + template + class sparse_iterator + { + friend class sparse_hash; + + private: + using sparse_bucket_iterator = typename std::conditional::type; + + using sparse_array_iterator = typename std::conditional::type; + + /** + * sparse_array_it should be nullptr if sparse_bucket_it == + * m_sparse_buckets_data.end(). (TODO better way?) + */ + sparse_iterator( + sparse_bucket_iterator sparse_bucket_it, sparse_array_iterator sparse_array_it) + : m_sparse_buckets_it(sparse_bucket_it), m_sparse_array_it(sparse_array_it) + {} + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename sparse_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + + sparse_iterator() noexcept {} + + // Copy constructor from iterator to const_iterator. + template::type* = nullptr> + sparse_iterator(const sparse_iterator& other) noexcept + : m_sparse_buckets_it(other.m_sparse_buckets_it) + , m_sparse_array_it(other.m_sparse_array_it) + {} + + sparse_iterator(const sparse_iterator& other) = default; + sparse_iterator(sparse_iterator&& other) = default; + sparse_iterator& operator=(const sparse_iterator& other) = default; + sparse_iterator& operator=(sparse_iterator&& other) = default; + + const typename sparse_hash::key_type& key() const + { + return KeySelect()(*m_sparse_array_it); + } + + template::value && IsConst>::type* = nullptr> + const typename U::value_type& value() const + { + return U()(*m_sparse_array_it); + } + + template::value && !IsConst>::type* = nullptr> + typename U::value_type& value() + { + return U()(*m_sparse_array_it); + } + + reference operator*() const + { + return *m_sparse_array_it; + } + + pointer operator->() const + { + return std::addressof(*m_sparse_array_it); + } + + sparse_iterator& operator++() + { + tsl_sh_assert(m_sparse_array_it != nullptr); + ++m_sparse_array_it; + + if (m_sparse_array_it == m_sparse_buckets_it->end()) + { + do + { + if (m_sparse_buckets_it->last()) + { + ++m_sparse_buckets_it; + m_sparse_array_it = nullptr; + return *this; + } + + ++m_sparse_buckets_it; + } while (m_sparse_buckets_it->empty()); + + m_sparse_array_it = m_sparse_buckets_it->begin(); + } + + return *this; + } + + sparse_iterator operator++(int) + { + sparse_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const sparse_iterator& lhs, const sparse_iterator& rhs) + { + return lhs.m_sparse_buckets_it == rhs.m_sparse_buckets_it + && lhs.m_sparse_array_it == rhs.m_sparse_array_it; + } + + friend bool operator!=(const sparse_iterator& lhs, const sparse_iterator& rhs) + { + return !(lhs == rhs); + } + + private: + sparse_bucket_iterator m_sparse_buckets_it; + sparse_array_iterator m_sparse_array_it; + }; + + public: + sparse_hash(size_type bucket_count, const Hash& hash, const KeyEqual& equal, + const Allocator& alloc, float max_load_factor) + : Allocator(alloc) + , Hash(hash) + , KeyEqual(equal) + , GrowthPolicy(bucket_count) + , m_sparse_buckets_data(alloc) + , m_sparse_buckets(static_empty_sparse_bucket_ptr()) + , m_bucket_count(bucket_count) + , m_nb_elements(0) + , m_nb_deleted_buckets(0) + { + if (m_bucket_count > max_bucket_count()) + { + TSL_SH_THROW_OR_ABORT(std::length_error, "The map exceeds its maximum size."); + } + + if (m_bucket_count > 0) + { + /* + * We can't use the `vector(size_type count, const Allocator& alloc)` + * constructor as it's only available in C++14 and we need to support + * C++11. We thus must resize after using the `vector(const Allocator& + * alloc)` constructor. + * + * We can't use `vector(size_type count, const T& value, const Allocator& + * alloc)` as it requires the value T to be copyable. + */ + m_sparse_buckets_data.resize(sparse_array::nb_sparse_buckets(bucket_count)); + m_sparse_buckets = m_sparse_buckets_data.data(); + + tsl_sh_assert(!m_sparse_buckets_data.empty()); + m_sparse_buckets_data.back().set_as_last(); + } + + this->max_load_factor(max_load_factor); + + // Check in the constructor instead of outside of a function to avoid + // compilation issues when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value + || std::is_copy_constructible::value, + "Key, and T if present, must be nothrow move constructible " + "and/or copy constructible."); + } + + ~sparse_hash() + { + clear(); + } + + sparse_hash(const sparse_hash& other) + : Allocator( + std::allocator_traits::select_on_container_copy_construction(other)) + , Hash(other) + , KeyEqual(other) + , GrowthPolicy(other) + , m_sparse_buckets_data( + std::allocator_traits::select_on_container_copy_construction( + other)) + , m_bucket_count(other.m_bucket_count) + , m_nb_elements(other.m_nb_elements) + , m_nb_deleted_buckets(other.m_nb_deleted_buckets) + , m_load_threshold_rehash(other.m_load_threshold_rehash) + , m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted) + , m_max_load_factor(other.m_max_load_factor) + { + copy_buckets_from(other), m_sparse_buckets = m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + } + + sparse_hash(sparse_hash&& other) noexcept( + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value&& + std::is_nothrow_move_constructible::value) + : Allocator(std::move(other)) + , Hash(std::move(other)) + , KeyEqual(std::move(other)) + , GrowthPolicy(std::move(other)) + , m_sparse_buckets_data(std::move(other.m_sparse_buckets_data)) + , m_sparse_buckets(m_sparse_buckets_data.empty() ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data()) + , m_bucket_count(other.m_bucket_count) + , m_nb_elements(other.m_nb_elements) + , m_nb_deleted_buckets(other.m_nb_deleted_buckets) + , m_load_threshold_rehash(other.m_load_threshold_rehash) + , m_load_threshold_clear_deleted(other.m_load_threshold_clear_deleted) + , m_max_load_factor(other.m_max_load_factor) + { + other.GrowthPolicy::clear(); + other.m_sparse_buckets_data.clear(); + other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); + other.m_bucket_count = 0; + other.m_nb_elements = 0; + other.m_nb_deleted_buckets = 0; + other.m_load_threshold_rehash = 0; + other.m_load_threshold_clear_deleted = 0; + } + + sparse_hash& operator=(const sparse_hash& other) + { + if (this != &other) + { + clear(); + + if (std::allocator_traits< + Allocator>::propagate_on_container_copy_assignment::value) + { + Allocator::operator=(other); + } + + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + if (std::allocator_traits< + Allocator>::propagate_on_container_copy_assignment::value) + { + m_sparse_buckets_data = + sparse_buckets_container(static_cast(other)); + } + else + { + if (m_sparse_buckets_data.size() != other.m_sparse_buckets_data.size()) + { + m_sparse_buckets_data = + sparse_buckets_container(static_cast(*this)); + } + else + { + m_sparse_buckets_data.clear(); + } + } + + copy_buckets_from(other); + m_sparse_buckets = m_sparse_buckets_data.empty() + ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + + m_bucket_count = other.m_bucket_count; + m_nb_elements = other.m_nb_elements; + m_nb_deleted_buckets = other.m_nb_deleted_buckets; + m_load_threshold_rehash = other.m_load_threshold_rehash; + m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; + m_max_load_factor = other.m_max_load_factor; + } + + return *this; + } + + sparse_hash& operator=(sparse_hash&& other) + { + clear(); + + if (std::allocator_traits::propagate_on_container_move_assignment::value) + { + static_cast(*this) = std::move(static_cast(other)); + m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); + } + else if (static_cast(*this) != static_cast(other)) + { + move_buckets_from(std::move(other)); + } + else + { + static_cast(*this) = std::move(static_cast(other)); + m_sparse_buckets_data = std::move(other.m_sparse_buckets_data); + } + + m_sparse_buckets = m_sparse_buckets_data.empty() ? static_empty_sparse_bucket_ptr() + : m_sparse_buckets_data.data(); + + static_cast(*this) = std::move(static_cast(other)); + static_cast(*this) = std::move(static_cast(other)); + static_cast(*this) = std::move(static_cast(other)); + m_bucket_count = other.m_bucket_count; + m_nb_elements = other.m_nb_elements; + m_nb_deleted_buckets = other.m_nb_deleted_buckets; + m_load_threshold_rehash = other.m_load_threshold_rehash; + m_load_threshold_clear_deleted = other.m_load_threshold_clear_deleted; + m_max_load_factor = other.m_max_load_factor; + + other.GrowthPolicy::clear(); + other.m_sparse_buckets_data.clear(); + other.m_sparse_buckets = static_empty_sparse_bucket_ptr(); + other.m_bucket_count = 0; + other.m_nb_elements = 0; + other.m_nb_deleted_buckets = 0; + other.m_load_threshold_rehash = 0; + other.m_load_threshold_clear_deleted = 0; + + return *this; + } + + allocator_type get_allocator() const + { + return static_cast(*this); + } + + /* + * Iterators + */ + iterator begin() noexcept + { + auto begin = m_sparse_buckets_data.begin(); + while (begin != m_sparse_buckets_data.end() && begin->empty()) + { + ++begin; + } + + return iterator( + begin, (begin != m_sparse_buckets_data.end()) ? begin->begin() : nullptr); + } + + const_iterator begin() const noexcept + { + return cbegin(); + } + + const_iterator cbegin() const noexcept + { + auto begin = m_sparse_buckets_data.cbegin(); + while (begin != m_sparse_buckets_data.cend() && begin->empty()) + { + ++begin; + } + + return const_iterator( + begin, (begin != m_sparse_buckets_data.cend()) ? begin->cbegin() : nullptr); + } + + iterator end() noexcept + { + return iterator(m_sparse_buckets_data.end(), nullptr); + } + + const_iterator end() const noexcept + { + return cend(); + } + + const_iterator cend() const noexcept + { + return const_iterator(m_sparse_buckets_data.cend(), nullptr); + } + + /* + * Capacity + */ + bool empty() const noexcept + { + return m_nb_elements == 0; + } + + size_type size() const noexcept + { + return m_nb_elements; + } + + size_type max_size() const noexcept + { + return std::min( + std::allocator_traits::max_size(), m_sparse_buckets_data.max_size()); + } + + /* + * Modifiers + */ + void clear() noexcept + { + for (auto& bucket : m_sparse_buckets_data) + { + bucket.clear(*this); + } + + m_nb_elements = 0; + m_nb_deleted_buckets = 0; + } + + template + std::pair insert(P&& value) + { + return insert_impl(KeySelect()(value), std::forward

(value)); + } + + template + iterator insert_hint(const_iterator hint, P&& value) + { + if (hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) + { + return mutable_iterator(hint); + } + + return insert(std::forward

(value)).first; + } + + template + void insert(InputIt first, InputIt last) + { + if (std::is_base_of::iterator_category>::value) + { + const auto nb_elements_insert = std::distance(first, last); + const size_type nb_free_buckets = m_load_threshold_rehash - size(); + tsl_sh_assert(m_load_threshold_rehash >= size()); + + if (nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) + { + reserve(size() + size_type(nb_elements_insert)); + } + } + + for (; first != last; ++first) + { + insert(*first); + } + } + + template + std::pair insert_or_assign(K&& key, M&& obj) + { + auto it = try_emplace(std::forward(key), std::forward(obj)); + if (!it.second) + { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) + { + if (hint != cend() && compare_keys(KeySelect()(*hint), key)) + { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::forward(key), std::forward(obj)).first; + } + + template + std::pair emplace(Args&&... args) + { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args&&... args) + { + return insert_hint(hint, value_type(std::forward(args)...)); + } + + template + std::pair try_emplace(K&& key, Args&&... args) + { + return insert_impl(key, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + } + + template + iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) + { + if (hint != cend() && compare_keys(KeySelect()(*hint), key)) + { + return mutable_iterator(hint); + } + + return try_emplace(std::forward(key), std::forward(args)...).first; + } + + /** + * Here to avoid `template size_type erase(const K& key)` being used + * when we use an iterator instead of a const_iterator. + */ + iterator erase(iterator pos) + { + tsl_sh_assert(pos != end() && m_nb_elements > 0); + auto it_sparse_array_next = + pos.m_sparse_buckets_it->erase(*this, pos.m_sparse_array_it); + m_nb_elements--; + m_nb_deleted_buckets++; + + if (it_sparse_array_next == pos.m_sparse_buckets_it->end()) + { + auto it_sparse_buckets_next = pos.m_sparse_buckets_it; + do + { + ++it_sparse_buckets_next; + } while (it_sparse_buckets_next != m_sparse_buckets_data.end() + && it_sparse_buckets_next->empty()); + + if (it_sparse_buckets_next == m_sparse_buckets_data.end()) + { + return end(); + } + else + { + return iterator(it_sparse_buckets_next, it_sparse_buckets_next->begin()); + } + } + else + { + return iterator(pos.m_sparse_buckets_it, it_sparse_array_next); + } + } + + iterator erase(const_iterator pos) + { + return erase(mutable_iterator(pos)); + } + + iterator erase(const_iterator first, const_iterator last) + { + if (first == last) + { + return mutable_iterator(first); + } + + // TODO Optimize, could avoid the call to std::distance. + const size_type nb_elements_to_erase = + static_cast(std::distance(first, last)); + auto to_delete = mutable_iterator(first); + for (size_type i = 0; i < nb_elements_to_erase; i++) + { + to_delete = erase(to_delete); + } + + return to_delete; + } + + template + size_type erase(const K& key) + { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K& key, std::size_t hash) + { + return erase_impl(key, hash); + } + + void swap(sparse_hash& other) + { + using std::swap; + + if (std::allocator_traits::propagate_on_container_swap::value) + { + swap(static_cast(*this), static_cast(other)); + } + else + { + tsl_sh_assert(static_cast(*this) == static_cast(other)); + } + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(m_sparse_buckets_data, other.m_sparse_buckets_data); + swap(m_sparse_buckets, other.m_sparse_buckets); + swap(m_bucket_count, other.m_bucket_count); + swap(m_nb_elements, other.m_nb_elements); + swap(m_nb_deleted_buckets, other.m_nb_deleted_buckets); + swap(m_load_threshold_rehash, other.m_load_threshold_rehash); + swap(m_load_threshold_clear_deleted, other.m_load_threshold_clear_deleted); + swap(m_max_load_factor, other.m_max_load_factor); + } + + /* + * Lookup + */ + template::value>::type* = nullptr> + typename U::value_type& at(const K& key) + { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + typename U::value_type& at(const K& key, std::size_t hash) + { + return const_cast( + static_cast(this)->at(key, hash)); + } + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key) const + { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key, std::size_t hash) const + { + auto it = find(key, hash); + if (it != cend()) + { + return it.value(); + } + else + { + TSL_SH_THROW_OR_ABORT(std::out_of_range, "Couldn't find key."); + } + } + + template::value>::type* = nullptr> + typename U::value_type& operator[](K&& key) + { + return try_emplace(std::forward(key)).first.value(); + } + + template + bool contains(const K& key) const + { + return contains(key, hash_key(key)); + } + + template + bool contains(const K& key, std::size_t hash) const + { + return count(key, hash) != 0; + } + + template + size_type count(const K& key) const + { + return count(key, hash_key(key)); + } + + template + size_type count(const K& key, std::size_t hash) const + { + if (find(key, hash) != cend()) + { + return 1; + } + else + { + return 0; + } + } + + template + iterator find(const K& key) + { + return find_impl(key, hash_key(key)); + } + + template + iterator find(const K& key, std::size_t hash) + { + return find_impl(key, hash); + } + + template + const_iterator find(const K& key) const + { + return find_impl(key, hash_key(key)); + } + + template + const_iterator find(const K& key, std::size_t hash) const + { + return find_impl(key, hash); + } + + template + std::pair equal_range(const K& key) + { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) + { + iterator it = find(key, hash); + return std::make_pair(it, (it == end()) ? it : std::next(it)); + } + + template + std::pair equal_range(const K& key) const + { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range( + const K& key, std::size_t hash) const + { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend()) ? it : std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const + { + return m_bucket_count; + } + + size_type max_bucket_count() const + { + return m_sparse_buckets_data.max_size(); + } + + /* + * Hash policy + */ + float load_factor() const + { + if (bucket_count() == 0) + { + return 0; + } + + return float(m_nb_elements) / float(bucket_count()); + } + + float max_load_factor() const + { + return m_max_load_factor; + } + + void max_load_factor(float ml) + { + m_max_load_factor = std::max(0.1f, std::min(ml, 0.8f)); + m_load_threshold_rehash = size_type(float(bucket_count()) * m_max_load_factor); + + const float max_load_factor_with_deleted_buckets = + m_max_load_factor + 0.5f * (1.0f - m_max_load_factor); + tsl_sh_assert(max_load_factor_with_deleted_buckets > 0.0f + && max_load_factor_with_deleted_buckets <= 1.0f); + m_load_threshold_clear_deleted = + size_type(float(bucket_count()) * max_load_factor_with_deleted_buckets); + } + + void rehash(size_type count) + { + count = std::max(count, size_type(std::ceil(float(size()) / max_load_factor()))); + rehash_impl(count); + } + + void reserve(size_type count) + { + rehash(size_type(std::ceil(float(count) / max_load_factor()))); + } + + /* + * Observers + */ + hasher hash_function() const + { + return static_cast(*this); + } + + key_equal key_eq() const + { + return static_cast(*this); + } + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) + { + auto it_sparse_buckets = + m_sparse_buckets_data.begin() + + std::distance(m_sparse_buckets_data.cbegin(), pos.m_sparse_buckets_it); + + return iterator( + it_sparse_buckets, sparse_array::mutable_iterator(pos.m_sparse_array_it)); + } + + template + void serialize(Serializer& serializer) const + { + serialize_impl(serializer); + } + + template + void deserialize(Deserializer& deserializer, bool hash_compatible) + { + deserialize_impl(deserializer, hash_compatible); + } + + private: + template + std::size_t hash_key(const K& key) const + { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1& key1, const K2& key2) const + { + return KeyEqual::operator()(key1, key2); + } + + size_type bucket_for_hash(std::size_t hash) const + { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_sh_assert(sparse_array::sparse_ibucket(bucket) < m_sparse_buckets_data.size() + || (bucket == 0 && m_sparse_buckets_data.empty())); + + return bucket; + } + + template::value>::type* = nullptr> + size_type next_bucket(size_type ibucket, size_type iprobe) const + { + (void)iprobe; + if (Probing == tsl::sh::probing::linear) + { + return (ibucket + 1) & this->m_mask; + } + else + { + tsl_sh_assert(Probing == tsl::sh::probing::quadratic); + return (ibucket + iprobe) & this->m_mask; + } + } + + template::value>::type* = nullptr> + size_type next_bucket(size_type ibucket, size_type iprobe) const + { + (void)iprobe; + if (Probing == tsl::sh::probing::linear) + { + ibucket++; + return (ibucket != bucket_count()) ? ibucket : 0; + } + else + { + tsl_sh_assert(Probing == tsl::sh::probing::quadratic); + ibucket += iprobe; + return (ibucket < bucket_count()) ? ibucket : ibucket % bucket_count(); + } + } + + // TODO encapsulate m_sparse_buckets_data to avoid the managing the allocator + void copy_buckets_from(const sparse_hash& other) + { + m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); + + TSL_SH_TRY + { + for (const auto& bucket : other.m_sparse_buckets_data) + { + m_sparse_buckets_data.emplace_back(bucket, static_cast(*this)); + } + } + TSL_SH_CATCH(...) + { + clear(); + TSL_SH_RETRHOW; + } + + tsl_sh_assert(m_sparse_buckets_data.empty() || m_sparse_buckets_data.back().last()); + } + + void move_buckets_from(sparse_hash&& other) + { + m_sparse_buckets_data.reserve(other.m_sparse_buckets_data.size()); + + TSL_SH_TRY + { + for (auto&& bucket : other.m_sparse_buckets_data) + { + m_sparse_buckets_data.emplace_back( + std::move(bucket), static_cast(*this)); + } + } + TSL_SH_CATCH(...) + { + clear(); + TSL_SH_RETRHOW; + } + + tsl_sh_assert(m_sparse_buckets_data.empty() || m_sparse_buckets_data.back().last()); + } + + template + std::pair insert_impl(const K& key, Args&&... value_type_args) + { + /** + * We must insert the value in the first empty or deleted bucket we find. If + * we first find a deleted bucket, we still have to continue the search + * until we find an empty bucket or until we have searched all the buckets + * to be sure that the value is not in the hash table. We thus remember the + * position, if any, of the first deleted bucket we have encountered so we + * can insert it there if needed. + */ + bool found_first_deleted_bucket = false; + std::size_t sparse_ibucket_first_deleted = 0; + typename sparse_array::size_type index_in_sparse_bucket_first_deleted = 0; + + const std::size_t hash = hash_key(key); + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) + { + std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + auto index_in_sparse_bucket = sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) + { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) + { + return std::make_pair( + iterator(m_sparse_buckets_data.begin() + sparse_ibucket, value_it), + false); + } + } + else if (m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) + && probe < m_bucket_count) + { + if (!found_first_deleted_bucket) + { + found_first_deleted_bucket = true; + sparse_ibucket_first_deleted = sparse_ibucket; + index_in_sparse_bucket_first_deleted = index_in_sparse_bucket; + } + } + else + { + /** + * At this point we are sure that the value does not exist + * in the hash table. + * First check if we satisfy load and delete thresholds, and if not, + * rehash the hash table (and therefore start over). Otherwise, just + * insert the value into the appropriate bucket. + */ + if (size() >= m_load_threshold_rehash) + { + rehash_impl(GrowthPolicy::next_bucket_count()); + return insert_impl(key, std::forward(value_type_args)...); + } + else if (size() + m_nb_deleted_buckets >= m_load_threshold_clear_deleted) + { + clear_deleted_buckets(); + return insert_impl(key, std::forward(value_type_args)...); + } + + if (found_first_deleted_bucket) + { + auto it = insert_in_bucket(sparse_ibucket_first_deleted, + index_in_sparse_bucket_first_deleted, + std::forward(value_type_args)...); + m_nb_deleted_buckets--; + + return it; + } + + return insert_in_bucket(sparse_ibucket, index_in_sparse_bucket, + std::forward(value_type_args)...); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + std::pair insert_in_bucket(std::size_t sparse_ibucket, + typename sparse_array::size_type index_in_sparse_bucket, Args&&... value_type_args) + { + auto value_it = m_sparse_buckets[sparse_ibucket].set( + *this, index_in_sparse_bucket, std::forward(value_type_args)...); + m_nb_elements++; + + return std::make_pair( + iterator(m_sparse_buckets_data.begin() + sparse_ibucket, value_it), true); + } + + template + size_type erase_impl(const K& key, std::size_t hash) + { + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) + { + const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + const auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) + { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) + { + m_sparse_buckets[sparse_ibucket].erase( + *this, value_it, index_in_sparse_bucket); + m_nb_elements--; + m_nb_deleted_buckets++; + + return 1; + } + } + else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) + || probe >= m_bucket_count) + { + return 0; + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + iterator find_impl(const K& key, std::size_t hash) + { + return mutable_iterator(static_cast(this)->find(key, hash)); + } + + template + const_iterator find_impl(const K& key, std::size_t hash) const + { + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) + { + const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + const auto index_in_sparse_bucket = + sparse_array::index_in_sparse_bucket(ibucket); + + if (m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) + { + auto value_it = + m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket); + if (compare_keys(key, KeySelect()(*value_it))) + { + return const_iterator( + m_sparse_buckets_data.cbegin() + sparse_ibucket, value_it); + } + } + else if (!m_sparse_buckets[sparse_ibucket].has_deleted_value( + index_in_sparse_bucket) + || probe >= m_bucket_count) + { + return cend(); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + void clear_deleted_buckets() + { + // TODO could be optimized, we could do it in-place instead of allocating a + // new bucket array. + rehash_impl(m_bucket_count); + tsl_sh_assert(m_nb_deleted_buckets == 0); + } + + template::type* = nullptr> + void rehash_impl(size_type count) + { + sparse_hash new_table(count, static_cast(*this), + static_cast(*this), static_cast(*this), + m_max_load_factor); + + for (auto& bucket : m_sparse_buckets_data) + { + for (auto& val : bucket) + { + new_table.insert_on_rehash(std::move(val)); + } + + // TODO try to reuse some of the memory + bucket.clear(*this); + } + + new_table.swap(*this); + } + + /** + * TODO: For now we copy each element into the new map. We could move + * them if they are nothrow_move_constructible without triggering + * any exception if we reserve enough space in the sparse arrays beforehand. + */ + template::type* = nullptr> + void rehash_impl(size_type count) + { + sparse_hash new_table(count, static_cast(*this), + static_cast(*this), static_cast(*this), + m_max_load_factor); + + for (const auto& bucket : m_sparse_buckets_data) + { + for (const auto& val : bucket) + { + new_table.insert_on_rehash(val); + } + } + + new_table.swap(*this); + } + + template + void insert_on_rehash(K&& key_value) + { + const std::size_t hash = hash_key(KeySelect()(key_value)); + std::size_t ibucket = bucket_for_hash(hash); + + std::size_t probe = 0; + while (true) + { + std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); + auto index_in_sparse_bucket = sparse_array::index_in_sparse_bucket(ibucket); + + if (!m_sparse_buckets[sparse_ibucket].has_value(index_in_sparse_bucket)) + { + m_sparse_buckets[sparse_ibucket].set( + *this, index_in_sparse_bucket, std::forward(key_value)); + m_nb_elements++; + + return; + } + else + { + tsl_sh_assert(!compare_keys(KeySelect()(key_value), + KeySelect()( + *m_sparse_buckets[sparse_ibucket].value(index_in_sparse_bucket)))); + } + + probe++; + ibucket = next_bucket(ibucket, probe); + } + } + + template + void serialize_impl(Serializer& serializer) const + { + const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION; + serializer(version); + + const slz_size_type bucket_count = m_bucket_count; + serializer(bucket_count); + + const slz_size_type nb_sparse_buckets = m_sparse_buckets_data.size(); + serializer(nb_sparse_buckets); + + const slz_size_type nb_elements = m_nb_elements; + serializer(nb_elements); + + const slz_size_type nb_deleted_buckets = m_nb_deleted_buckets; + serializer(nb_deleted_buckets); + + const float max_load_factor = m_max_load_factor; + serializer(max_load_factor); + + for (const auto& bucket : m_sparse_buckets_data) + { + bucket.serialize(serializer); + } + } + + template + void deserialize_impl(Deserializer& deserializer, bool hash_compatible) + { + tsl_sh_assert( + m_bucket_count == 0 + && m_sparse_buckets_data.empty()); // Current hash table must be empty + + const slz_size_type version = deserialize_value(deserializer); + // For now we only have one version of the serialization protocol. + // If it doesn't match there is a problem with the file. + if (version != SERIALIZATION_PROTOCOL_VERSION) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, + "Can't deserialize the sparse_map/set. The " + "protocol version header is invalid."); + } + + const slz_size_type bucket_count_ds = + deserialize_value(deserializer); + const slz_size_type nb_sparse_buckets = + deserialize_value(deserializer); + const slz_size_type nb_elements = deserialize_value(deserializer); + const slz_size_type nb_deleted_buckets = + deserialize_value(deserializer); + const float max_load_factor = deserialize_value(deserializer); + + if (!hash_compatible) + { + this->max_load_factor(max_load_factor); + reserve(numeric_cast( + nb_elements, "Deserialized nb_elements is too big.")); + for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) + { + sparse_array::deserialize_values_into_sparse_hash(deserializer, *this); + } + } + else + { + m_bucket_count = numeric_cast( + bucket_count_ds, "Deserialized bucket_count is too big."); + + GrowthPolicy::operator=(GrowthPolicy(m_bucket_count)); + // GrowthPolicy should not modify the bucket count we got from + // deserialization + if (m_bucket_count != bucket_count_ds) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, + "The GrowthPolicy is not the same even though " + "hash_compatible is true."); + } + + if (nb_sparse_buckets != sparse_array::nb_sparse_buckets(m_bucket_count)) + { + TSL_SH_THROW_OR_ABORT( + std::runtime_error, "Deserialized nb_sparse_buckets is invalid."); + } + + m_nb_elements = numeric_cast( + nb_elements, "Deserialized nb_elements is too big."); + m_nb_deleted_buckets = numeric_cast( + nb_deleted_buckets, "Deserialized nb_deleted_buckets is too big."); + + m_sparse_buckets_data.reserve(numeric_cast( + nb_sparse_buckets, "Deserialized nb_sparse_buckets is too big.")); + for (slz_size_type ibucket = 0; ibucket < nb_sparse_buckets; ibucket++) + { + m_sparse_buckets_data.emplace_back( + sparse_array::deserialize_hash_compatible( + deserializer, static_cast(*this))); + } + + if (!m_sparse_buckets_data.empty()) + { + m_sparse_buckets_data.back().set_as_last(); + m_sparse_buckets = m_sparse_buckets_data.data(); + } + + this->max_load_factor(max_load_factor); + if (load_factor() > this->max_load_factor()) + { + TSL_SH_THROW_OR_ABORT(std::runtime_error, + "Invalid max_load_factor. Check that the serializer and " + "deserializer support " + "floats correctly as they can be converted implicitely to ints."); + } + } + } + + public: + static const size_type DEFAULT_INIT_BUCKET_COUNT = 0; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; + + /** + * Protocol version currenlty used for serialization. + */ + static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1; + + /** + * Return an always valid pointer to an static empty bucket_entry with + * last_bucket() == true. + */ + sparse_array* static_empty_sparse_bucket_ptr() + { + static sparse_array empty_sparse_bucket(true); + return &empty_sparse_bucket; + } + + private: + sparse_buckets_container m_sparse_buckets_data; + + /** + * Points to m_sparse_buckets_data.data() if !m_sparse_buckets_data.empty() + * otherwise points to static_empty_sparse_bucket_ptr. This variable is useful + * to avoid the cost of checking if m_sparse_buckets_data is empty when trying + * to find an element. + * + * TODO Remove m_sparse_buckets_data and only use a pointer instead of a + * pointer+vector to save some space in the sparse_hash object. + */ + sparse_array* m_sparse_buckets; + + size_type m_bucket_count; + size_type m_nb_elements; + size_type m_nb_deleted_buckets; + + /** + * Maximum that m_nb_elements can reach before a rehash occurs automatically + * to grow the hash table. + */ + size_type m_load_threshold_rehash; + + /** + * Maximum that m_nb_elements + m_nb_deleted_buckets can reach before cleaning + * up the buckets marked as deleted. + */ + size_type m_load_threshold_clear_deleted; + float m_max_load_factor; + }; + + } // namespace detail_sparse_hash +} // namespace tsl #endif diff --git a/Include/Pipe/Extern/sparse_map.h b/Include/Pipe/Extern/sparse_map.h index b21ba4d5..158f813c 100644 --- a/Include/Pipe/Extern/sparse_map.h +++ b/Include/Pipe/Extern/sparse_map.h @@ -24,6 +24,8 @@ #ifndef TSL_SPARSE_MAP_H #define TSL_SPARSE_MAP_H +#include "Pipe/Extern/sparse_hash.h" + #include #include #include @@ -31,770 +33,908 @@ #include #include -#include "Pipe/Extern/sparse_hash.h" - -namespace tsl { - -/** - * Implementation of a sparse hash map using open-addressing with quadratic - * probing. The goal on the hash map is to be the most memory efficient - * possible, even at low load factor, while keeping reasonable performances. - * - * `GrowthPolicy` defines how the map grows and consequently how a hash value is - * mapped to a bucket. By default the map uses - * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of - * buckets to a power of two and uses a mask to map the hash to a bucket instead - * of the slow modulo. Other growth policies are available and you may define - * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the - * interface. - * - * `ExceptionSafety` defines the exception guarantee provided by the class. By - * default only the basic exception safety is guaranteed which mean that all - * resources used by the hash map will be freed (no memory leaks) but the hash - * map may end-up in an undefined state if an exception is thrown (undefined - * here means that some elements may be missing). This can ONLY happen on rehash - * (either on insert or if `rehash` is called explicitly) and will occur if the - * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor - * (when a nothrow move constructor is not available) throws an exception. This - * can be avoided by calling `reserve` beforehand. This basic guarantee is - * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. - * It is possible to ask for the strong exception guarantee with - * `tsl::sh::exception_safety::strong`, the drawback is that the map will be - * slower on rehashes and will also need more memory on rehashes. - * - * `Sparsity` defines how much the hash set will compromise between insertion - * speed and memory usage. A high sparsity means less memory usage but longer - * insertion times, and vice-versa for low sparsity. The default - * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't - * change the lookup speed. - * - * `Key` and `T` must be nothrow move constructible and/or copy constructible. - * - * If the destructor of `Key` or `T` throws an exception, the behaviour of the - * class is undefined. - * - * Iterators invalidation: - * - clear, operator=, reserve, rehash: always invalidate the iterators. - * - insert, emplace, emplace_hint, operator[]: if there is an effective - * insert, invalidate the iterators. - * - erase: always invalidate the iterators. - */ -template , - class KeyEqual = std::equal_to, - class Allocator = std::allocator>, - class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, - tsl::sh::exception_safety ExceptionSafety = - tsl::sh::exception_safety::basic, - tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> -class sparse_map { - private: - template - using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type &operator()( - const std::pair &key_value) const noexcept { - return key_value.first; - } - - key_type &operator()(std::pair &key_value) noexcept { - return key_value.first; - } - }; - - class ValueSelect { - public: - using value_type = T; - - const value_type &operator()( - const std::pair &key_value) const noexcept { - return key_value.second; - } - - value_type &operator()(std::pair &key_value) noexcept { - return key_value.second; - } - }; - - using ht = detail_sparse_hash::sparse_hash< - std::pair, KeySelect, ValueSelect, Hash, KeyEqual, Allocator, - GrowthPolicy, ExceptionSafety, Sparsity, tsl::sh::probing::quadratic>; - - public: - using key_type = typename ht::key_type; - using mapped_type = T; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - public: - /* - * Constructors - */ - sparse_map() : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT) {} - - explicit sparse_map(size_type bucket_count, const Hash &hash = Hash(), - const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} - - sparse_map(size_type bucket_count, const Allocator &alloc) - : sparse_map(bucket_count, Hash(), KeyEqual(), alloc) {} - - sparse_map(size_type bucket_count, const Hash &hash, const Allocator &alloc) - : sparse_map(bucket_count, hash, KeyEqual(), alloc) {} - - explicit sparse_map(const Allocator &alloc) - : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) {} - - template - sparse_map(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, - const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : sparse_map(bucket_count, hash, equal, alloc) { - insert(first, last); - } - - template - sparse_map(InputIt first, InputIt last, size_type bucket_count, - const Allocator &alloc) - : sparse_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} - - template - sparse_map(InputIt first, InputIt last, size_type bucket_count, - const Hash &hash, const Allocator &alloc) - : sparse_map(first, last, bucket_count, hash, KeyEqual(), alloc) {} - - sparse_map(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, - const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : sparse_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) { - } - - sparse_map(std::initializer_list init, size_type bucket_count, - const Allocator &alloc) - : sparse_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), - alloc) {} - - sparse_map(std::initializer_list init, size_type bucket_count, - const Hash &hash, const Allocator &alloc) - : sparse_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), - alloc) {} - - sparse_map &operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - std::pair insert(const value_type &value) { - return m_ht.insert(value); - } - - template ::value>::type * = nullptr> - std::pair insert(P &&value) { - return m_ht.emplace(std::forward

(value)); - } - - std::pair insert(value_type &&value) { - return m_ht.insert(std::move(value)); - } - - iterator insert(const_iterator hint, const value_type &value) { - return m_ht.insert_hint(hint, value); - } - - template ::value>::type * = nullptr> - iterator insert(const_iterator hint, P &&value) { - return m_ht.emplace_hint(hint, std::forward

(value)); - } - - iterator insert(const_iterator hint, value_type &&value) { - return m_ht.insert_hint(hint, std::move(value)); - } - - template - void insert(InputIt first, InputIt last) { - m_ht.insert(first, last); - } - - void insert(std::initializer_list ilist) { - m_ht.insert(ilist.begin(), ilist.end()); - } - - template - std::pair insert_or_assign(const key_type &k, M &&obj) { - return m_ht.insert_or_assign(k, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type &&k, M &&obj) { - return m_ht.insert_or_assign(std::move(k), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type &k, M &&obj) { - return m_ht.insert_or_assign(hint, k, std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, key_type &&k, M &&obj) { - return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); - } - - /** - * Due to the way elements are stored, emplace will need to move or copy the - * key-value once. The method is equivalent to - * `insert(value_type(std::forward(args)...));`. - * - * Mainly here for compatibility with the `std::unordered_map` interface. - */ - template - std::pair emplace(Args &&...args) { - return m_ht.emplace(std::forward(args)...); - } - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy - * the key-value once. The method is equivalent to `insert(hint, - * value_type(std::forward(args)...));`. - * - * Mainly here for compatibility with the `std::unordered_map` interface. - */ - template - iterator emplace_hint(const_iterator hint, Args &&...args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - template - std::pair try_emplace(const key_type &k, Args &&...args) { - return m_ht.try_emplace(k, std::forward(args)...); - } - - template - std::pair try_emplace(key_type &&k, Args &&...args) { - return m_ht.try_emplace(std::move(k), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type &k, Args &&...args) { - return m_ht.try_emplace_hint(hint, k, std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, key_type &&k, Args &&...args) { - return m_ht.try_emplace_hint(hint, std::move(k), - std::forward(args)...); - } - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { - return m_ht.erase(first, last); - } - size_type erase(const key_type &key) { return m_ht.erase(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - size_type erase(const key_type &key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type erase(const K &key) { - return m_ht.erase(key); - } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type erase(const K &key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - void swap(sparse_map &other) { other.m_ht.swap(m_ht); } - - /* - * Lookup - */ - T &at(const Key &key) { return m_ht.at(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - T &at(const Key &key, std::size_t precalculated_hash) { - return m_ht.at(key, precalculated_hash); - } - - const T &at(const Key &key) const { return m_ht.at(key); } - - /** - * @copydoc at(const Key& key, std::size_t precalculated_hash) - */ - const T &at(const Key &key, std::size_t precalculated_hash) const { - return m_ht.at(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - T &at(const K &key) { - return m_ht.at(key); - } - - /** - * @copydoc at(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - T &at(const K &key, std::size_t precalculated_hash) { - return m_ht.at(key, precalculated_hash); - } - - /** - * @copydoc at(const K& key) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const T &at(const K &key) const { - return m_ht.at(key); - } - - /** - * @copydoc at(const K& key, std::size_t precalculated_hash) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const T &at(const K &key, std::size_t precalculated_hash) const { - return m_ht.at(key, precalculated_hash); - } - - T &operator[](const Key &key) { return m_ht[key]; } - T &operator[](Key &&key) { return m_ht[std::move(key)]; } - - size_type count(const Key &key) const { return m_ht.count(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - size_type count(const Key &key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type count(const K &key) const { - return m_ht.count(key); - } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type count(const K &key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - iterator find(const Key &key) { return m_ht.find(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - iterator find(const Key &key, std::size_t precalculated_hash) { - return m_ht.find(key, precalculated_hash); - } - - const_iterator find(const Key &key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key &key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - iterator find(const K &key) { - return m_ht.find(key); - } - - /** - * @copydoc find(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - iterator find(const K &key, std::size_t precalculated_hash) { - return m_ht.find(key, precalculated_hash); - } - - /** - * @copydoc find(const K& key) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const_iterator find(const K &key) const { - return m_ht.find(key); - } - - /** - * @copydoc find(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const_iterator find(const K &key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - bool contains(const Key &key) const { return m_ht.contains(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The - * hash value should be the same as hash_function()(key). Useful to speed-up - * the lookup if you already have the hash. - */ - bool contains(const Key &key, std::size_t precalculated_hash) const { - return m_ht.contains(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * KeyEqual::is_transparent exists. If so, K must be hashable and comparable - * to Key. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - bool contains(const K &key) const { - return m_ht.contains(key); - } - - /** - * @copydoc contains(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The - * hash value should be the same as hash_function()(key). Useful to speed-up - * the lookup if you already have the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - bool contains(const K &key, std::size_t precalculated_hash) const { - return m_ht.contains(key, precalculated_hash); - } - - std::pair equal_range(const Key &key) { - return m_ht.equal_range(key); - } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - std::pair equal_range(const Key &key, - std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key &key) const { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range( - const Key &key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key) { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key, - std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key) const { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range( - const K &key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count) { m_ht.rehash(count); } - void reserve(size_type count) { m_ht.reserve(count); } - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - - /* - * Other - */ - - /** - * Convert a `const_iterator` to an `iterator`. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - /** - * Serialize the map through the `serializer` parameter. - * - * The `serializer` parameter must be a function object that supports the - * following call: - * - `template void operator()(const U& value);` where the types - * `std::uint64_t`, `float` and `std::pair` must be supported for U. - * - * The implementation leaves binary compatibility (endianness, IEEE 754 for - * floats, ...) of the types it serializes in the hands of the `Serializer` - * function object if compatibility is required. - */ - template - void serialize(Serializer &serializer) const { - m_ht.serialize(serializer); - } - - /** - * Deserialize a previously serialized map through the `deserializer` - * parameter. - * - * The `deserializer` parameter must be a function object that supports the - * following calls: - * - `template U operator()();` where the types `std::uint64_t`, - * `float` and `std::pair` must be supported for U. - * - * If the deserialized hash map type is hash compatible with the serialized - * map, the deserialization process can be sped up by setting - * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and - * GrowthPolicy must behave the same way than the ones used on the serialized - * map. The `std::size_t` must also be of the same size as the one on the - * platform used to serialize the map. If these criteria are not met, the - * behaviour is undefined with `hash_compatible` sets to true. - * - * The behaviour is undefined if the type `Key` and `T` of the `sparse_map` - * are not the same as the types used during serialization. - * - * The implementation leaves binary compatibility (endianness, IEEE 754 for - * floats, size of int, ...) of the types it deserializes in the hands of the - * `Deserializer` function object if compatibility is required. - */ - template - static sparse_map deserialize(Deserializer &deserializer, - bool hash_compatible = false) { - sparse_map map(0); - map.m_ht.deserialize(deserializer, hash_compatible); - - return map; - } - - friend bool operator==(const sparse_map &lhs, const sparse_map &rhs) { - if (lhs.size() != rhs.size()) { - return false; - } - - for (const auto &element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs.first); - if (it_element_rhs == rhs.cend() || - element_lhs.second != it_element_rhs->second) { - return false; - } - } - - return true; - } - - friend bool operator!=(const sparse_map &lhs, const sparse_map &rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(sparse_map &lhs, sparse_map &rhs) { lhs.swap(rhs); } - - private: - ht m_ht; -}; - -/** - * Same as `tsl::sparse_map`. - */ -template , - class KeyEqual = std::equal_to, - class Allocator = std::allocator>> -using sparse_pg_map = - sparse_map; -} // end namespace tsl +namespace tsl +{ + + /** + * Implementation of a sparse hash map using open-addressing with quadratic + * probing. The goal on the hash map is to be the most memory efficient + * possible, even at low load factor, while keeping reasonable performances. + * + * `GrowthPolicy` defines how the map grows and consequently how a hash value is + * mapped to a bucket. By default the map uses + * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of + * buckets to a power of two and uses a mask to map the hash to a bucket instead + * of the slow modulo. Other growth policies are available and you may define + * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the + * interface. + * + * `ExceptionSafety` defines the exception guarantee provided by the class. By + * default only the basic exception safety is guaranteed which mean that all + * resources used by the hash map will be freed (no memory leaks) but the hash + * map may end-up in an undefined state if an exception is thrown (undefined + * here means that some elements may be missing). This can ONLY happen on rehash + * (either on insert or if `rehash` is called explicitly) and will occur if the + * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor + * (when a nothrow move constructor is not available) throws an exception. This + * can be avoided by calling `reserve` beforehand. This basic guarantee is + * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. + * It is possible to ask for the strong exception guarantee with + * `tsl::sh::exception_safety::strong`, the drawback is that the map will be + * slower on rehashes and will also need more memory on rehashes. + * + * `Sparsity` defines how much the hash set will compromise between insertion + * speed and memory usage. A high sparsity means less memory usage but longer + * insertion times, and vice-versa for low sparsity. The default + * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't + * change the lookup speed. + * + * `Key` and `T` must be nothrow move constructible and/or copy constructible. + * + * If the destructor of `Key` or `T` throws an exception, the behaviour of the + * class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective + * insert, invalidate the iterators. + * - erase: always invalidate the iterators. + */ + template, class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, + tsl::sh::exception_safety ExceptionSafety = tsl::sh::exception_safety::basic, + tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> + class sparse_map + { + private: + template + using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; + + class KeySelect + { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const noexcept + { + return key_value.first; + } + + key_type& operator()(std::pair& key_value) noexcept + { + return key_value.first; + } + }; + + class ValueSelect + { + public: + using value_type = T; + + const value_type& operator()(const std::pair& key_value) const noexcept + { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) noexcept + { + return key_value.second; + } + }; + + using ht = detail_sparse_hash::sparse_hash, KeySelect, ValueSelect, Hash, + KeyEqual, Allocator, GrowthPolicy, ExceptionSafety, Sparsity, + tsl::sh::probing::quadratic>; + + public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + public: + /* + * Constructors + */ + sparse_map() : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT) {} + + explicit sparse_map(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + {} + + sparse_map(size_type bucket_count, const Allocator& alloc) + : sparse_map(bucket_count, Hash(), KeyEqual(), alloc) + {} + + sparse_map(size_type bucket_count, const Hash& hash, const Allocator& alloc) + : sparse_map(bucket_count, hash, KeyEqual(), alloc) + {} + + explicit sparse_map(const Allocator& alloc) + : sparse_map(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) + {} + + template + sparse_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : sparse_map(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + sparse_map(InputIt first, InputIt last, size_type bucket_count, const Allocator& alloc) + : sparse_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) + {} + + template + sparse_map(InputIt first, InputIt last, size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : sparse_map(first, last, bucket_count, hash, KeyEqual(), alloc) + {} + + sparse_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : sparse_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) + {} + + sparse_map( + std::initializer_list init, size_type bucket_count, const Allocator& alloc) + : sparse_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + {} + + sparse_map(std::initializer_list init, size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : sparse_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + {} + + sparse_map& operator=(std::initializer_list ilist) + { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const + { + return m_ht.get_allocator(); + } + + /* + * Iterators + */ + iterator begin() noexcept + { + return m_ht.begin(); + } + const_iterator begin() const noexcept + { + return m_ht.begin(); + } + const_iterator cbegin() const noexcept + { + return m_ht.cbegin(); + } + + iterator end() noexcept + { + return m_ht.end(); + } + const_iterator end() const noexcept + { + return m_ht.end(); + } + const_iterator cend() const noexcept + { + return m_ht.cend(); + } + + /* + * Capacity + */ + bool empty() const noexcept + { + return m_ht.empty(); + } + size_type size() const noexcept + { + return m_ht.size(); + } + size_type max_size() const noexcept + { + return m_ht.max_size(); + } + + /* + * Modifiers + */ + void clear() noexcept + { + m_ht.clear(); + } + + std::pair insert(const value_type& value) + { + return m_ht.insert(value); + } + + template::value>::type* = nullptr> + std::pair insert(P&& value) + { + return m_ht.emplace(std::forward

(value)); + } + + std::pair insert(value_type&& value) + { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) + { + return m_ht.insert_hint(hint, value); + } + + template::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) + { + return m_ht.emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) + { + return m_ht.insert_hint(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) + { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) + { + m_ht.insert(ilist.begin(), ilist.end()); + } + + template + std::pair insert_or_assign(const key_type& k, M&& obj) + { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) + { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) + { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) + { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * `insert(value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + std::pair emplace(Args&&... args) + { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to `insert(hint, + * value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) + { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) + { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) + { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) + { + return m_ht.try_emplace_hint(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) + { + return m_ht.try_emplace_hint(hint, std::move(k), std::forward(args)...); + } + + iterator erase(iterator pos) + { + return m_ht.erase(pos); + } + iterator erase(const_iterator pos) + { + return m_ht.erase(pos); + } + iterator erase(const_iterator first, const_iterator last) + { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) + { + return m_ht.erase(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) + { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) + { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) + { + return m_ht.erase(key, precalculated_hash); + } + + void swap(sparse_map& other) + { + other.m_ht.swap(m_ht); + } + + /* + * Lookup + */ + T& at(const Key& key) + { + return m_ht.at(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) + { + return m_ht.at(key, precalculated_hash); + } + + const T& at(const Key& key) const + { + return m_ht.at(key); + } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.at(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + T& at(const K& key) + { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) + { + return m_ht.at(key, precalculated_hash); + } + + /** + * @copydoc at(const K& key) + */ + template::value>::type* = nullptr> + const T& at(const K& key) const + { + return m_ht.at(key); + } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const + { + return m_ht.at(key, precalculated_hash); + } + + T& operator[](const Key& key) + { + return m_ht[key]; + } + T& operator[](Key&& key) + { + return m_ht[std::move(key)]; + } + + size_type count(const Key& key) const + { + return m_ht.count(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const + { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const + { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) + { + return m_ht.find(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) + { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const + { + return m_ht.find(key); + } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + iterator find(const K& key) + { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) + { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const + { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const + { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const + { + return m_ht.contains(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template::value>::type* = nullptr> + bool contains(const K& key) const + { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const + { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) + { + return m_ht.equal_range(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) + { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const + { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) + { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const + { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const + { + return m_ht.bucket_count(); + } + size_type max_bucket_count() const + { + return m_ht.max_bucket_count(); + } + + /* + * Hash policy + */ + float load_factor() const + { + return m_ht.load_factor(); + } + float max_load_factor() const + { + return m_ht.max_load_factor(); + } + void max_load_factor(float ml) + { + m_ht.max_load_factor(ml); + } + + void rehash(size_type count) + { + m_ht.rehash(count); + } + void reserve(size_type count) + { + m_ht.reserve(count); + } + + /* + * Observers + */ + hasher hash_function() const + { + return m_ht.hash_function(); + } + key_equal key_eq() const + { + return m_ht.key_eq(); + } + + /* + * Other + */ + + /** + * Convert a `const_iterator` to an `iterator`. + */ + iterator mutable_iterator(const_iterator pos) + { + return m_ht.mutable_iterator(pos); + } + + /** + * Serialize the map through the `serializer` parameter. + * + * The `serializer` parameter must be a function object that supports the + * following call: + * - `template void operator()(const U& value);` where the types + * `std::uint64_t`, `float` and `std::pair` must be supported for U. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, ...) of the types it serializes in the hands of the `Serializer` + * function object if compatibility is required. + */ + template + void serialize(Serializer& serializer) const + { + m_ht.serialize(serializer); + } + + /** + * Deserialize a previously serialized map through the `deserializer` + * parameter. + * + * The `deserializer` parameter must be a function object that supports the + * following calls: + * - `template U operator()();` where the types `std::uint64_t`, + * `float` and `std::pair` must be supported for U. + * + * If the deserialized hash map type is hash compatible with the serialized + * map, the deserialization process can be sped up by setting + * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and + * GrowthPolicy must behave the same way than the ones used on the serialized + * map. The `std::size_t` must also be of the same size as the one on the + * platform used to serialize the map. If these criteria are not met, the + * behaviour is undefined with `hash_compatible` sets to true. + * + * The behaviour is undefined if the type `Key` and `T` of the `sparse_map` + * are not the same as the types used during serialization. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, size of int, ...) of the types it deserializes in the hands of the + * `Deserializer` function object if compatibility is required. + */ + template + static sparse_map deserialize(Deserializer& deserializer, bool hash_compatible = false) + { + sparse_map map(0); + map.m_ht.deserialize(deserializer, hash_compatible); + + return map; + } + + friend bool operator==(const sparse_map& lhs, const sparse_map& rhs) + { + if (lhs.size() != rhs.size()) + { + return false; + } + + for (const auto& element_lhs : lhs) + { + const auto it_element_rhs = rhs.find(element_lhs.first); + if (it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) + { + return false; + } + } + + return true; + } + + friend bool operator!=(const sparse_map& lhs, const sparse_map& rhs) + { + return !operator==(lhs, rhs); + } + + friend void swap(sparse_map& lhs, sparse_map& rhs) + { + lhs.swap(rhs); + } + + private: + ht m_ht; + }; + + /** + * Same as `tsl::sparse_map`. + */ + template, class KeyEqual = std::equal_to, + class Allocator = std::allocator>> + using sparse_pg_map = + sparse_map; + +} // end namespace tsl #endif diff --git a/Include/Pipe/Extern/sparse_set.h b/Include/Pipe/Extern/sparse_set.h index d1d30743..844a6c27 100644 --- a/Include/Pipe/Extern/sparse_set.h +++ b/Include/Pipe/Extern/sparse_set.h @@ -24,6 +24,8 @@ #ifndef TSL_SPARSE_SET_H #define TSL_SPARSE_SET_H +#include "Pipe/Extern/sparse_hash.h" + #include #include #include @@ -31,625 +33,741 @@ #include #include -#include "Pipe/Extern/sparse_hash.h" - -namespace tsl { - -/** - * Implementation of a sparse hash set using open-addressing with quadratic - * probing. The goal on the hash set is to be the most memory efficient - * possible, even at low load factor, while keeping reasonable performances. - * - * `GrowthPolicy` defines how the set grows and consequently how a hash value is - * mapped to a bucket. By default the set uses - * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of - * buckets to a power of two and uses a mask to map the hash to a bucket instead - * of the slow modulo. Other growth policies are available and you may define - * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the - * interface. - * - * `ExceptionSafety` defines the exception guarantee provided by the class. By - * default only the basic exception safety is guaranteed which mean that all - * resources used by the hash set will be freed (no memory leaks) but the hash - * set may end-up in an undefined state if an exception is thrown (undefined - * here means that some elements may be missing). This can ONLY happen on rehash - * (either on insert or if `rehash` is called explicitly) and will occur if the - * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor - * (when a nothrow move constructor is not available) throws an exception. This - * can be avoided by calling `reserve` beforehand. This basic guarantee is - * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. - * It is possible to ask for the strong exception guarantee with - * `tsl::sh::exception_safety::strong`, the drawback is that the set will be - * slower on rehashes and will also need more memory on rehashes. - * - * `Sparsity` defines how much the hash set will compromise between insertion - * speed and memory usage. A high sparsity means less memory usage but longer - * insertion times, and vice-versa for low sparsity. The default - * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't - * change the lookup speed. - * - * `Key` must be nothrow move constructible and/or copy constructible. - * - * If the destructor of `Key` throws an exception, the behaviour of the class is - * undefined. - * - * Iterators invalidation: - * - clear, operator=, reserve, rehash: always invalidate the iterators. - * - insert, emplace, emplace_hint: if there is an effective insert, invalidate - * the iterators. - * - erase: always invalidate the iterators. - */ -template , - class KeyEqual = std::equal_to, - class Allocator = std::allocator, - class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, - tsl::sh::exception_safety ExceptionSafety = - tsl::sh::exception_safety::basic, - tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> -class sparse_set { - private: - template - using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type &operator()(const Key &key) const noexcept { return key; } - - key_type &operator()(Key &key) noexcept { return key; } - }; - - using ht = - detail_sparse_hash::sparse_hash; - - public: - using key_type = typename ht::key_type; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - /* - * Constructors - */ - sparse_set() : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT) {} - - explicit sparse_set(size_type bucket_count, const Hash &hash = Hash(), - const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) {} - - sparse_set(size_type bucket_count, const Allocator &alloc) - : sparse_set(bucket_count, Hash(), KeyEqual(), alloc) {} - - sparse_set(size_type bucket_count, const Hash &hash, const Allocator &alloc) - : sparse_set(bucket_count, hash, KeyEqual(), alloc) {} - - explicit sparse_set(const Allocator &alloc) - : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) {} - - template - sparse_set(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, - const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : sparse_set(bucket_count, hash, equal, alloc) { - insert(first, last); - } - - template - sparse_set(InputIt first, InputIt last, size_type bucket_count, - const Allocator &alloc) - : sparse_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {} - - template - sparse_set(InputIt first, InputIt last, size_type bucket_count, - const Hash &hash, const Allocator &alloc) - : sparse_set(first, last, bucket_count, hash, KeyEqual(), alloc) {} - - sparse_set(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, - const Hash &hash = Hash(), const KeyEqual &equal = KeyEqual(), - const Allocator &alloc = Allocator()) - : sparse_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) { - } - - sparse_set(std::initializer_list init, size_type bucket_count, - const Allocator &alloc) - : sparse_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), - alloc) {} - - sparse_set(std::initializer_list init, size_type bucket_count, - const Hash &hash, const Allocator &alloc) - : sparse_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), - alloc) {} - - sparse_set &operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - std::pair insert(const value_type &value) { - return m_ht.insert(value); - } - - std::pair insert(value_type &&value) { - return m_ht.insert(std::move(value)); - } - - iterator insert(const_iterator hint, const value_type &value) { - return m_ht.insert_hint(hint, value); - } - - iterator insert(const_iterator hint, value_type &&value) { - return m_ht.insert_hint(hint, std::move(value)); - } - - template - void insert(InputIt first, InputIt last) { - m_ht.insert(first, last); - } - - void insert(std::initializer_list ilist) { - m_ht.insert(ilist.begin(), ilist.end()); - } - - /** - * Due to the way elements are stored, emplace will need to move or copy the - * key-value once. The method is equivalent to - * `insert(value_type(std::forward(args)...));`. - * - * Mainly here for compatibility with the `std::unordered_map` interface. - */ - template - std::pair emplace(Args &&...args) { - return m_ht.emplace(std::forward(args)...); - } - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy - * the key-value once. The method is equivalent to `insert(hint, - * value_type(std::forward(args)...));`. - * - * Mainly here for compatibility with the `std::unordered_map` interface. - */ - template - iterator emplace_hint(const_iterator hint, Args &&...args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { - return m_ht.erase(first, last); - } - size_type erase(const key_type &key) { return m_ht.erase(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - size_type erase(const key_type &key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type erase(const K &key) { - return m_ht.erase(key); - } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type erase(const K &key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - void swap(sparse_set &other) { other.m_ht.swap(m_ht); } - - /* - * Lookup - */ - size_type count(const Key &key) const { return m_ht.count(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - size_type count(const Key &key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type count(const K &key) const { - return m_ht.count(key); - } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - size_type count(const K &key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - iterator find(const Key &key) { return m_ht.find(key); } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - iterator find(const Key &key, std::size_t precalculated_hash) { - return m_ht.find(key, precalculated_hash); - } - - const_iterator find(const Key &key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key &key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - iterator find(const K &key) { - return m_ht.find(key); - } - - /** - * @copydoc find(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - iterator find(const K &key, std::size_t precalculated_hash) { - return m_ht.find(key, precalculated_hash); - } - - /** - * @copydoc find(const K& key) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const_iterator find(const K &key) const { - return m_ht.find(key); - } - - /** - * @copydoc find(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - const_iterator find(const K &key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - bool contains(const Key &key) const { return m_ht.contains(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The - * hash value should be the same as hash_function()(key). Useful to speed-up - * the lookup if you already have the hash. - */ - bool contains(const Key &key, std::size_t precalculated_hash) const { - return m_ht.contains(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * KeyEqual::is_transparent exists. If so, K must be hashable and comparable - * to Key. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - bool contains(const K &key) const { - return m_ht.contains(key); - } - - /** - * @copydoc contains(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The - * hash value should be the same as hash_function()(key). Useful to speed-up - * the lookup if you already have the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - bool contains(const K &key, std::size_t precalculated_hash) const { - return m_ht.contains(key, precalculated_hash); - } - - std::pair equal_range(const Key &key) { - return m_ht.equal_range(key); - } - - /** - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - std::pair equal_range(const Key &key, - std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key &key) const { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range( - const Key &key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef - * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and - * comparable to `Key`. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key) { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value `precalculated_hash` instead of hashing the key. The - * hash value should be the same as `hash_function()(key)`, otherwise the - * behaviour is undefined. Useful to speed-up the lookup if you already have - * the hash. - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key, - std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range(const K &key) const { - return m_ht.equal_range(key); - } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template < - class K, class KE = KeyEqual, - typename std::enable_if::value>::type * = nullptr> - std::pair equal_range( - const K &key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count) { m_ht.rehash(count); } - void reserve(size_type count) { m_ht.reserve(count); } - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - - /* - * Other - */ - - /** - * Convert a `const_iterator` to an `iterator`. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - /** - * Serialize the set through the `serializer` parameter. - * - * The `serializer` parameter must be a function object that supports the - * following call: - * - `void operator()(const U& value);` where the types `std::uint64_t`, - * `float` and `Key` must be supported for U. - * - * The implementation leaves binary compatibility (endianness, IEEE 754 for - * floats, ...) of the types it serializes in the hands of the `Serializer` - * function object if compatibility is required. - */ - template - void serialize(Serializer &serializer) const { - m_ht.serialize(serializer); - } - - /** - * Deserialize a previously serialized set through the `deserializer` - * parameter. - * - * The `deserializer` parameter must be a function object that supports the - * following calls: - * - `template U operator()();` where the types `std::uint64_t`, - * `float` and `Key` must be supported for U. - * - * If the deserialized hash set type is hash compatible with the serialized - * set, the deserialization process can be sped up by setting - * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and - * GrowthPolicy must behave the same way than the ones used on the serialized - * set. The `std::size_t` must also be of the same size as the one on the - * platform used to serialize the set. If these criteria are not met, the - * behaviour is undefined with `hash_compatible` sets to true. - * - * The behaviour is undefined if the type `Key` of the `sparse_set` is not the - * same as the type used during serialization. - * - * The implementation leaves binary compatibility (endianness, IEEE 754 for - * floats, size of int, ...) of the types it deserializes in the hands of the - * `Deserializer` function object if compatibility is required. - */ - template - static sparse_set deserialize(Deserializer &deserializer, - bool hash_compatible = false) { - sparse_set set(0); - set.m_ht.deserialize(deserializer, hash_compatible); - - return set; - } - - friend bool operator==(const sparse_set &lhs, const sparse_set &rhs) { - if (lhs.size() != rhs.size()) { - return false; - } - - for (const auto &element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs); - if (it_element_rhs == rhs.cend()) { - return false; - } - } - - return true; - } - - friend bool operator!=(const sparse_set &lhs, const sparse_set &rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(sparse_set &lhs, sparse_set &rhs) { lhs.swap(rhs); } - - private: - ht m_ht; -}; - -/** - * Same as `tsl::sparse_set`. - */ -template , - class KeyEqual = std::equal_to, - class Allocator = std::allocator> -using sparse_pg_set = - sparse_set; -} // end namespace tsl +namespace tsl +{ + + /** + * Implementation of a sparse hash set using open-addressing with quadratic + * probing. The goal on the hash set is to be the most memory efficient + * possible, even at low load factor, while keeping reasonable performances. + * + * `GrowthPolicy` defines how the set grows and consequently how a hash value is + * mapped to a bucket. By default the set uses + * `tsl::sh::power_of_two_growth_policy`. This policy keeps the number of + * buckets to a power of two and uses a mask to map the hash to a bucket instead + * of the slow modulo. Other growth policies are available and you may define + * your own growth policy, check `tsl::sh::power_of_two_growth_policy` for the + * interface. + * + * `ExceptionSafety` defines the exception guarantee provided by the class. By + * default only the basic exception safety is guaranteed which mean that all + * resources used by the hash set will be freed (no memory leaks) but the hash + * set may end-up in an undefined state if an exception is thrown (undefined + * here means that some elements may be missing). This can ONLY happen on rehash + * (either on insert or if `rehash` is called explicitly) and will occur if the + * Allocator can't allocate memory (`std::bad_alloc`) or if the copy constructor + * (when a nothrow move constructor is not available) throws an exception. This + * can be avoided by calling `reserve` beforehand. This basic guarantee is + * similar to the one of `google::sparse_hash_map` and `spp::sparse_hash_map`. + * It is possible to ask for the strong exception guarantee with + * `tsl::sh::exception_safety::strong`, the drawback is that the set will be + * slower on rehashes and will also need more memory on rehashes. + * + * `Sparsity` defines how much the hash set will compromise between insertion + * speed and memory usage. A high sparsity means less memory usage but longer + * insertion times, and vice-versa for low sparsity. The default + * `tsl::sh::sparsity::medium` sparsity offers a good compromise. It doesn't + * change the lookup speed. + * + * `Key` must be nothrow move constructible and/or copy constructible. + * + * If the destructor of `Key` throws an exception, the behaviour of the class is + * undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint: if there is an effective insert, invalidate + * the iterators. + * - erase: always invalidate the iterators. + */ + template, class KeyEqual = std::equal_to, + class Allocator = std::allocator, + class GrowthPolicy = tsl::sh::power_of_two_growth_policy<2>, + tsl::sh::exception_safety ExceptionSafety = tsl::sh::exception_safety::basic, + tsl::sh::sparsity Sparsity = tsl::sh::sparsity::medium> + class sparse_set + { + private: + template + using has_is_transparent = tsl::detail_sparse_hash::has_is_transparent; + + class KeySelect + { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const noexcept + { + return key; + } + + key_type& operator()(Key& key) noexcept + { + return key; + } + }; + + using ht = detail_sparse_hash::sparse_hash; + + public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + /* + * Constructors + */ + sparse_set() : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT) {} + + explicit sparse_set(size_type bucket_count, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + {} + + sparse_set(size_type bucket_count, const Allocator& alloc) + : sparse_set(bucket_count, Hash(), KeyEqual(), alloc) + {} + + sparse_set(size_type bucket_count, const Hash& hash, const Allocator& alloc) + : sparse_set(bucket_count, hash, KeyEqual(), alloc) + {} + + explicit sparse_set(const Allocator& alloc) + : sparse_set(ht::DEFAULT_INIT_BUCKET_COUNT, alloc) + {} + + template + sparse_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : sparse_set(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + sparse_set(InputIt first, InputIt last, size_type bucket_count, const Allocator& alloc) + : sparse_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) + {} + + template + sparse_set(InputIt first, InputIt last, size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : sparse_set(first, last, bucket_count, hash, KeyEqual(), alloc) + {} + + sparse_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKET_COUNT, const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), const Allocator& alloc = Allocator()) + : sparse_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) + {} + + sparse_set( + std::initializer_list init, size_type bucket_count, const Allocator& alloc) + : sparse_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + {} + + sparse_set(std::initializer_list init, size_type bucket_count, const Hash& hash, + const Allocator& alloc) + : sparse_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + {} + + sparse_set& operator=(std::initializer_list ilist) + { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const + { + return m_ht.get_allocator(); + } + + /* + * Iterators + */ + iterator begin() noexcept + { + return m_ht.begin(); + } + const_iterator begin() const noexcept + { + return m_ht.begin(); + } + const_iterator cbegin() const noexcept + { + return m_ht.cbegin(); + } + + iterator end() noexcept + { + return m_ht.end(); + } + const_iterator end() const noexcept + { + return m_ht.end(); + } + const_iterator cend() const noexcept + { + return m_ht.cend(); + } + + /* + * Capacity + */ + bool empty() const noexcept + { + return m_ht.empty(); + } + size_type size() const noexcept + { + return m_ht.size(); + } + size_type max_size() const noexcept + { + return m_ht.max_size(); + } + + /* + * Modifiers + */ + void clear() noexcept + { + m_ht.clear(); + } + + std::pair insert(const value_type& value) + { + return m_ht.insert(value); + } + + std::pair insert(value_type&& value) + { + return m_ht.insert(std::move(value)); + } + + iterator insert(const_iterator hint, const value_type& value) + { + return m_ht.insert_hint(hint, value); + } + + iterator insert(const_iterator hint, value_type&& value) + { + return m_ht.insert_hint(hint, std::move(value)); + } + + template + void insert(InputIt first, InputIt last) + { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) + { + m_ht.insert(ilist.begin(), ilist.end()); + } + + /** + * Due to the way elements are stored, emplace will need to move or copy the + * key-value once. The method is equivalent to + * `insert(value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + std::pair emplace(Args&&... args) + { + return m_ht.emplace(std::forward(args)...); + } + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy + * the key-value once. The method is equivalent to `insert(hint, + * value_type(std::forward(args)...));`. + * + * Mainly here for compatibility with the `std::unordered_map` interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) + { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + iterator erase(iterator pos) + { + return m_ht.erase(pos); + } + iterator erase(const_iterator pos) + { + return m_ht.erase(pos); + } + iterator erase(const_iterator first, const_iterator last) + { + return m_ht.erase(first, last); + } + size_type erase(const key_type& key) + { + return m_ht.erase(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) + { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) + { + return m_ht.erase(key); + } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) + { + return m_ht.erase(key, precalculated_hash); + } + + void swap(sparse_set& other) + { + other.m_ht.swap(m_ht); + } + + /* + * Lookup + */ + size_type count(const Key& key) const + { + return m_ht.count(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const + { + return m_ht.count(key); + } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const + { + return m_ht.count(key, precalculated_hash); + } + + iterator find(const Key& key) + { + return m_ht.find(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) + { + return m_ht.find(key, precalculated_hash); + } + + const_iterator find(const Key& key) const + { + return m_ht.find(key); + } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + iterator find(const K& key) + { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) + { + return m_ht.find(key, precalculated_hash); + } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const + { + return m_ht.find(key); + } + + /** + * @copydoc find(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const + { + return m_ht.find(key, precalculated_hash); + } + + bool contains(const Key& key) const + { + return m_ht.contains(key); + } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + bool contains(const Key& key, std::size_t precalculated_hash) const + { + return m_ht.contains(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * KeyEqual::is_transparent exists. If so, K must be hashable and comparable + * to Key. + */ + template::value>::type* = nullptr> + bool contains(const K& key) const + { + return m_ht.contains(key); + } + + /** + * @copydoc contains(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The + * hash value should be the same as hash_function()(key). Useful to speed-up + * the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + bool contains(const K& key, std::size_t precalculated_hash) const + { + return m_ht.contains(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) + { + return m_ht.equal_range(key); + } + + /** + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) + { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range( + const Key& key, std::size_t precalculated_hash) const + { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef + * `KeyEqual::is_transparent` exists. If so, `K` must be hashable and + * comparable to `Key`. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value `precalculated_hash` instead of hashing the key. The + * hash value should be the same as `hash_function()(key)`, otherwise the + * behaviour is undefined. Useful to speed-up the lookup if you already have + * the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) + { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const + { + return m_ht.equal_range(key); + } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range( + const K& key, std::size_t precalculated_hash) const + { + return m_ht.equal_range(key, precalculated_hash); + } + + /* + * Bucket interface + */ + size_type bucket_count() const + { + return m_ht.bucket_count(); + } + size_type max_bucket_count() const + { + return m_ht.max_bucket_count(); + } + + /* + * Hash policy + */ + float load_factor() const + { + return m_ht.load_factor(); + } + float max_load_factor() const + { + return m_ht.max_load_factor(); + } + void max_load_factor(float ml) + { + m_ht.max_load_factor(ml); + } + + void rehash(size_type count) + { + m_ht.rehash(count); + } + void reserve(size_type count) + { + m_ht.reserve(count); + } + + /* + * Observers + */ + hasher hash_function() const + { + return m_ht.hash_function(); + } + key_equal key_eq() const + { + return m_ht.key_eq(); + } + + /* + * Other + */ + + /** + * Convert a `const_iterator` to an `iterator`. + */ + iterator mutable_iterator(const_iterator pos) + { + return m_ht.mutable_iterator(pos); + } + + /** + * Serialize the set through the `serializer` parameter. + * + * The `serializer` parameter must be a function object that supports the + * following call: + * - `void operator()(const U& value);` where the types `std::uint64_t`, + * `float` and `Key` must be supported for U. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, ...) of the types it serializes in the hands of the `Serializer` + * function object if compatibility is required. + */ + template + void serialize(Serializer& serializer) const + { + m_ht.serialize(serializer); + } + + /** + * Deserialize a previously serialized set through the `deserializer` + * parameter. + * + * The `deserializer` parameter must be a function object that supports the + * following calls: + * - `template U operator()();` where the types `std::uint64_t`, + * `float` and `Key` must be supported for U. + * + * If the deserialized hash set type is hash compatible with the serialized + * set, the deserialization process can be sped up by setting + * `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and + * GrowthPolicy must behave the same way than the ones used on the serialized + * set. The `std::size_t` must also be of the same size as the one on the + * platform used to serialize the set. If these criteria are not met, the + * behaviour is undefined with `hash_compatible` sets to true. + * + * The behaviour is undefined if the type `Key` of the `sparse_set` is not the + * same as the type used during serialization. + * + * The implementation leaves binary compatibility (endianness, IEEE 754 for + * floats, size of int, ...) of the types it deserializes in the hands of the + * `Deserializer` function object if compatibility is required. + */ + template + static sparse_set deserialize(Deserializer& deserializer, bool hash_compatible = false) + { + sparse_set set(0); + set.m_ht.deserialize(deserializer, hash_compatible); + + return set; + } + + friend bool operator==(const sparse_set& lhs, const sparse_set& rhs) + { + if (lhs.size() != rhs.size()) + { + return false; + } + + for (const auto& element_lhs : lhs) + { + const auto it_element_rhs = rhs.find(element_lhs); + if (it_element_rhs == rhs.cend()) + { + return false; + } + } + + return true; + } + + friend bool operator!=(const sparse_set& lhs, const sparse_set& rhs) + { + return !operator==(lhs, rhs); + } + + friend void swap(sparse_set& lhs, sparse_set& rhs) + { + lhs.swap(rhs); + } + + private: + ht m_ht; + }; + + /** + * Same as `tsl::sparse_set`. + */ + template, class KeyEqual = std::equal_to, + class Allocator = std::allocator> + using sparse_pg_set = sparse_set; + +} // end namespace tsl #endif diff --git a/Include/Pipe/Extern/utf8.h b/Include/Pipe/Extern/utf8.h index 28244378..6d5c1c9d 100644 --- a/Include/Pipe/Extern/utf8.h +++ b/Include/Pipe/Extern/utf8.h @@ -34,8 +34,8 @@ and set it to one of the values used by the __cplusplus predefined macro. For instance, #define UTF_CPP_CPLUSPLUS 199711L -will cause the UTF-8 CPP library to use only types and language features available in the C++ 98 standard. -Some library features will be disabled. +will cause the UTF-8 CPP library to use only types and language features available in the C++ 98 +standard. Some library features will be disabled. If you leave UTF_CPP_CPLUSPLUS undefined, it will be internally assigned to __cplusplus. */ @@ -43,4 +43,4 @@ If you leave UTF_CPP_CPLUSPLUS undefined, it will be internally assigned to __cp #include "Pipe/Extern/utf8/checked.h" #include "Pipe/Extern/utf8/unchecked.h" -#endif // header guard +#endif // header guard diff --git a/Include/Pipe/Extern/utf8/checked.h b/Include/Pipe/Extern/utf8/checked.h index d0dc4aa9..4b159fa1 100644 --- a/Include/Pipe/Extern/utf8/checked.h +++ b/Include/Pipe/Extern/utf8/checked.h @@ -29,331 +29,372 @@ DEALINGS IN THE SOFTWARE. #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "Pipe/Extern/utf8/core.h" + #include + namespace utf8 { - // Base for the exceptions that may be thrown from the library - class exception : public ::std::exception { - }; - - // Exceptions that may be thrown from the library functions. - class invalid_code_point : public exception { - utfchar32_t cp; - public: - invalid_code_point(utfchar32_t codepoint) : cp(codepoint) {} - virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; } - utfchar32_t code_point() const {return cp;} - }; - - class invalid_utf8 : public exception { - utfchar8_t u8; - public: - invalid_utf8 (utfchar8_t u) : u8(u) {} - invalid_utf8 (char c) : u8(static_cast(c)) {} - virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; } - utfchar8_t utf8_octet() const {return u8;} - }; - - class invalid_utf16 : public exception { - utfchar16_t u16; - public: - invalid_utf16 (utfchar16_t u) : u16(u) {} - virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; } - utfchar16_t utf16_word() const {return u16;} - }; - - class not_enough_room : public exception { - public: - virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; } - }; - - /// The library API - functions intended to be called by the users - - template - octet_iterator append(utfchar32_t cp, octet_iterator result) - { - if (!utf8::internal::is_code_point_valid(cp)) - throw invalid_code_point(cp); - - return internal::append(cp, result); - } - - inline void append(utfchar32_t cp, std::string& s) - { - append(cp, std::back_inserter(s)); - } - - template - word_iterator append16(utfchar32_t cp, word_iterator result) - { - if (!utf8::internal::is_code_point_valid(cp)) - throw invalid_code_point(cp); - - return internal::append16(cp, result); - } - - template - output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement) - { - while (start != end) { - octet_iterator sequence_start = start; - internal::utf_error err_code = utf8::internal::validate_next(start, end); - switch (err_code) { - case internal::UTF8_OK : - for (octet_iterator it = sequence_start; it != start; ++it) - *out++ = *it; - break; - case internal::NOT_ENOUGH_ROOM: - out = utf8::append (replacement, out); - start = end; - break; - case internal::INVALID_LEAD: - out = utf8::append (replacement, out); - ++start; - break; - case internal::INCOMPLETE_SEQUENCE: - case internal::OVERLONG_SEQUENCE: - case internal::INVALID_CODE_POINT: - out = utf8::append (replacement, out); - ++start; - // just one replacement mark for the sequence - while (start != end && utf8::internal::is_trail(*start)) - ++start; - break; - } - } - return out; - } - - template - inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) - { - static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd); - return utf8::replace_invalid(start, end, out, replacement_marker); - } - - inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); - return result; - } - - inline std::string replace_invalid(const std::string& s) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - template - utfchar32_t next(octet_iterator& it, octet_iterator end) - { - utfchar32_t cp = 0; - internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); - switch (err_code) { - case internal::UTF8_OK : - break; - case internal::NOT_ENOUGH_ROOM : - throw not_enough_room(); - case internal::INVALID_LEAD : - case internal::INCOMPLETE_SEQUENCE : - case internal::OVERLONG_SEQUENCE : - throw invalid_utf8(static_cast(*it)); - case internal::INVALID_CODE_POINT : - throw invalid_code_point(cp); - } - return cp; - } - - template - utfchar32_t next16(word_iterator& it, word_iterator end) - { - utfchar32_t cp = 0; - internal::utf_error err_code = utf8::internal::validate_next16(it, end, cp); - if (err_code == internal::NOT_ENOUGH_ROOM) - throw not_enough_room(); - return cp; - } - - template - utfchar32_t peek_next(octet_iterator it, octet_iterator end) - { - return utf8::next(it, end); - } - - template - utfchar32_t prior(octet_iterator& it, octet_iterator start) - { - // can't do much if it == start - if (it == start) - throw not_enough_room(); - - octet_iterator end = it; - // Go back until we hit either a lead octet or start - while (utf8::internal::is_trail(*(--it))) - if (it == start) - throw invalid_utf8(*it); // error - no lead byte in the sequence - return utf8::peek_next(it, end); - } - - template - void advance (octet_iterator& it, distance_type n, octet_iterator end) - { - const distance_type zero(0); - if (n < zero) { - // backward - for (distance_type i = n; i < zero; ++i) - utf8::prior(it, end); - } else { - // forward - for (distance_type i = zero; i < n; ++i) - utf8::next(it, end); - } - } - - template - typename std::iterator_traits::difference_type - distance (octet_iterator first, octet_iterator last) - { - typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) - utf8::next(first, last); - return dist; - } - - template - octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { - while (start != end) { - utfchar32_t cp = utf8::internal::mask16(*start++); - // Take care of surrogate pairs first - if (utf8::internal::is_lead_surrogate(cp)) { - if (start != end) { - const utfchar32_t trail_surrogate = utf8::internal::mask16(*start++); - if (utf8::internal::is_trail_surrogate(trail_surrogate)) - cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; - else - throw invalid_utf16(static_cast(trail_surrogate)); - } - else - throw invalid_utf16(static_cast(cp)); - - } - // Lone trail surrogate - else if (utf8::internal::is_trail_surrogate(cp)) - throw invalid_utf16(static_cast(cp)); - - result = utf8::append(cp, result); - } - return result; - } - - template - u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) - { - while (start < end) { - const utfchar32_t cp = utf8::next(start, end); - if (cp > 0xffff) { //make a surrogate pair - *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); - *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); - } - else - *result++ = static_cast(cp); - } - return result; - } - - template - octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) - { - while (start != end) - result = utf8::append(*(start++), result); - - return result; - } - - template - u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) - { - while (start < end) - (*result++) = utf8::next(start, end); - - return result; - } - - // The iterator class - template - class iterator { - octet_iterator it; - octet_iterator range_start; - octet_iterator range_end; - public: - typedef utfchar32_t value_type; - typedef utfchar32_t* pointer; - typedef utfchar32_t& reference; - typedef std::ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - iterator () {} - explicit iterator (const octet_iterator& octet_it, - const octet_iterator& rangestart, - const octet_iterator& rangeend) : - it(octet_it), range_start(rangestart), range_end(rangeend) - { - if (it < range_start || it > range_end) - throw std::out_of_range("Invalid utf-8 iterator position"); - } - // the default "big three" are OK - octet_iterator base () const { return it; } - utfchar32_t operator * () const - { - octet_iterator temp = it; - return utf8::next(temp, range_end); - } - bool operator == (const iterator& rhs) const - { - if (range_start != rhs.range_start || range_end != rhs.range_end) - throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); - return (it == rhs.it); - } - bool operator != (const iterator& rhs) const - { - return !(operator == (rhs)); - } - iterator& operator ++ () - { - utf8::next(it, range_end); - return *this; - } - iterator operator ++ (int) - { - iterator temp = *this; - utf8::next(it, range_end); - return temp; - } - iterator& operator -- () - { - utf8::prior(it, range_start); - return *this; - } - iterator operator -- (int) - { - iterator temp = *this; - utf8::prior(it, range_start); - return temp; - } - }; // class iterator - -} // namespace utf8 - -#if UTF_CPP_CPLUSPLUS >= 202002L // C++ 20 or later -#include "cpp20.h" -#elif UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later -#include "cpp17.h" -#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later -#include "cpp11.h" -#endif // C++ 11 or later - -#endif //header guard - + // Base for the exceptions that may be thrown from the library + class exception : public ::std::exception + {}; + + // Exceptions that may be thrown from the library functions. + class invalid_code_point : public exception + { + utfchar32_t cp; + + public: + invalid_code_point(utfchar32_t codepoint) : cp(codepoint) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE + { + return "Invalid code point"; + } + utfchar32_t code_point() const + { + return cp; + } + }; + + class invalid_utf8 : public exception + { + utfchar8_t u8; + + public: + invalid_utf8(utfchar8_t u) : u8(u) {} + invalid_utf8(char c) : u8(static_cast(c)) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE + { + return "Invalid UTF-8"; + } + utfchar8_t utf8_octet() const + { + return u8; + } + }; + + class invalid_utf16 : public exception + { + utfchar16_t u16; + + public: + invalid_utf16(utfchar16_t u) : u16(u) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE + { + return "Invalid UTF-16"; + } + utfchar16_t utf16_word() const + { + return u16; + } + }; + + class not_enough_room : public exception + { + public: + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE + { + return "Not enough space"; + } + }; + + /// The library API - functions intended to be called by the users + + template + octet_iterator append(utfchar32_t cp, octet_iterator result) + { + if (!utf8::internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + return internal::append(cp, result); + } + + inline void append(utfchar32_t cp, std::string& s) + { + append(cp, std::back_inserter(s)); + } + + template + word_iterator append16(utfchar32_t cp, word_iterator result) + { + if (!utf8::internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + return internal::append16(cp, result); + } + + template + output_iterator replace_invalid( + octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement) + { + while (start != end) + { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) + { + case internal::UTF8_OK: + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + out = utf8::append(replacement, out); + start = end; + break; + case internal::INVALID_LEAD: + out = utf8::append(replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::append(replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid( + octet_iterator start, octet_iterator end, output_iterator out) + { + static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::replace_invalid(start, end, out, replacement_marker); + } + + inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(const std::string& s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + template + utfchar32_t next(octet_iterator& it, octet_iterator end) + { + utfchar32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); + switch (err_code) + { + case internal::UTF8_OK: break; + case internal::NOT_ENOUGH_ROOM: throw not_enough_room(); + case internal::INVALID_LEAD: + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: throw invalid_utf8(static_cast(*it)); + case internal::INVALID_CODE_POINT: throw invalid_code_point(cp); + } + return cp; + } + + template + utfchar32_t next16(word_iterator& it, word_iterator end) + { + utfchar32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next16(it, end, cp); + if (err_code == internal::NOT_ENOUGH_ROOM) + throw not_enough_room(); + return cp; + } + + template + utfchar32_t peek_next(octet_iterator it, octet_iterator end) + { + return utf8::next(it, end); + } + + template + utfchar32_t prior(octet_iterator& it, octet_iterator start) + { + // can't do much if it == start + if (it == start) + throw not_enough_room(); + + octet_iterator end = it; + // Go back until we hit either a lead octet or start + while (utf8::internal::is_trail(*(--it))) + if (it == start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + return utf8::peek_next(it, end); + } + + template + void advance(octet_iterator& it, distance_type n, octet_iterator end) + { + const distance_type zero(0); + if (n < zero) + { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::prior(it, end); + } + else + { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::next(it, end); + } + } + + template + typename std::iterator_traits::difference_type distance( + octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::next(first, last); + return dist; + } + + template + octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) + { + utfchar32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) + { + if (start != end) + { + const utfchar32_t trail_surrogate = utf8::internal::mask16(*start++); + if (utf8::internal::is_trail_surrogate(trail_surrogate)) + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else + throw invalid_utf16(static_cast(trail_surrogate)); + } + else + throw invalid_utf16(static_cast(cp)); + } + // Lone trail surrogate + else if (utf8::internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + + result = utf8::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) + { + const utfchar32_t cp = utf8::next(start, end); + if (cp > 0xffff) + { // make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::next(start, end); + + return result; + } + + // The iterator class + template + class iterator + { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + + public: + typedef utfchar32_t value_type; + typedef utfchar32_t* pointer; + typedef utfchar32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + iterator() {} + explicit iterator(const octet_iterator& octet_it, const octet_iterator& rangestart, + const octet_iterator& rangeend) + : it(octet_it), range_start(rangestart), range_end(rangeend) + { + if (it < range_start || it > range_end) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base() const + { + return it; + } + utfchar32_t operator*() const + { + octet_iterator temp = it; + return utf8::next(temp, range_end); + } + bool operator==(const iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator!=(const iterator& rhs) const + { + return !(operator==(rhs)); + } + iterator& operator++() + { + utf8::next(it, range_end); + return *this; + } + iterator operator++(int) + { + iterator temp = *this; + utf8::next(it, range_end); + return temp; + } + iterator& operator--() + { + utf8::prior(it, range_start); + return *this; + } + iterator operator--(int) + { + iterator temp = *this; + utf8::prior(it, range_start); + return temp; + } + }; // class iterator + +} // namespace utf8 + +#if UTF_CPP_CPLUSPLUS >= 202002L // C++ 20 or later + #include "cpp20.h" +#elif UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later + #include "cpp17.h" +#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later + #include "cpp11.h" +#endif // C++ 11 or later + +#endif // header guard diff --git a/Include/Pipe/Extern/utf8/cpp11.h b/Include/Pipe/Extern/utf8/cpp11.h index 3c17cdc3..786a9be9 100644 --- a/Include/Pipe/Extern/utf8/cpp11.h +++ b/Include/Pipe/Extern/utf8/cpp11.h @@ -32,39 +32,38 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - inline void append16(utfchar32_t cp, std::u16string& s) - { - append16(cp, std::back_inserter(s)); - } + inline void append16(utfchar32_t cp, std::u16string& s) + { + append16(cp, std::back_inserter(s)); + } - inline std::string utf16to8(const std::u16string& s) - { - std::string result; - utf16to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } + inline std::string utf16to8(const std::u16string& s) + { + std::string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } - inline std::u16string utf8to16(const std::string& s) - { - std::u16string result; - utf8to16(s.begin(), s.end(), std::back_inserter(result)); - return result; - } + inline std::u16string utf8to16(const std::string& s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } - inline std::string utf32to8(const std::u32string& s) - { - std::string result; - utf32to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } + inline std::string utf32to8(const std::u32string& s) + { + std::string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } - inline std::u32string utf8to32(const std::string& s) - { - std::u32string result; - utf8to32(s.begin(), s.end(), std::back_inserter(result)); - return result; - } -} // namespace utf8 - -#endif // header guard + inline std::u32string utf8to32(const std::string& s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } +} // namespace utf8 +#endif // header guard diff --git a/Include/Pipe/Extern/utf8/cpp17.h b/Include/Pipe/Extern/utf8/cpp17.h index 44e515eb..7131e232 100644 --- a/Include/Pipe/Extern/utf8/cpp17.h +++ b/Include/Pipe/Extern/utf8/cpp17.h @@ -32,65 +32,65 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - inline std::string utf16to8(std::u16string_view s) - { - std::string result; - utf16to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u16string utf8to16(std::string_view s) - { - std::u16string result; - utf8to16(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::string utf32to8(std::u32string_view s) - { - std::string result; - utf32to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u32string utf8to32(std::string_view s) - { - std::u32string result; - utf8to32(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::size_t find_invalid(std::string_view s) - { - std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end()); - return (invalid == s.end()) ? std::string_view::npos : static_cast(invalid - s.begin()); - } - - inline bool is_valid(std::string_view s) - { - return is_valid(s.begin(), s.end()); - } - - inline std::string replace_invalid(std::string_view s, char32_t replacement) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); - return result; - } - - inline std::string replace_invalid(std::string_view s) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline bool starts_with_bom(std::string_view s) - { - return starts_with_bom(s.begin(), s.end()); - } - -} // namespace utf8 - -#endif // header guard - + inline std::string utf16to8(std::u16string_view s) + { + std::string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u16string utf8to16(std::string_view s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::string utf32to8(std::u32string_view s) + { + std::string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u32string utf8to32(std::string_view s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::size_t find_invalid(std::string_view s) + { + std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end()); + return (invalid == s.end()) ? std::string_view::npos + : static_cast(invalid - s.begin()); + } + + inline bool is_valid(std::string_view s) + { + return is_valid(s.begin(), s.end()); + } + + inline std::string replace_invalid(std::string_view s, char32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(std::string_view s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline bool starts_with_bom(std::string_view s) + { + return starts_with_bom(s.begin(), s.end()); + } + +} // namespace utf8 + +#endif // header guard diff --git a/Include/Pipe/Extern/utf8/cpp20.h b/Include/Pipe/Extern/utf8/cpp20.h index be6ce1b2..6e16bad5 100644 --- a/Include/Pipe/Extern/utf8/cpp20.h +++ b/Include/Pipe/Extern/utf8/cpp20.h @@ -32,93 +32,93 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - inline std::u8string utf16tou8(const std::u16string& s) - { - std::u8string result; - utf16to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u8string utf16tou8(std::u16string_view s) - { - std::u8string result; - utf16to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u16string utf8to16(const std::u8string& s) - { - std::u16string result; - utf8to16(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u16string utf8to16(const std::u8string_view& s) - { - std::u16string result; - utf8to16(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u8string utf32tou8(const std::u32string& s) - { - std::u8string result; - utf32to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u8string utf32tou8(const std::u32string_view& s) - { - std::u8string result; - utf32to8(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u32string utf8to32(const std::u8string& s) - { - std::u32string result; - utf8to32(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::u32string utf8to32(const std::u8string_view& s) - { - std::u32string result; - utf8to32(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline std::size_t find_invalid(const std::u8string& s) - { - std::u8string::const_iterator invalid = find_invalid(s.begin(), s.end()); - return (invalid == s.end()) ? std::string_view::npos : static_cast(invalid - s.begin()); - } - - inline bool is_valid(const std::u8string& s) - { - return is_valid(s.begin(), s.end()); - } - - inline std::u8string replace_invalid(const std::u8string& s, char32_t replacement) - { - std::u8string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); - return result; - } - - inline std::u8string replace_invalid(const std::u8string& s) - { - std::u8string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result)); - return result; - } - - inline bool starts_with_bom(const std::u8string& s) - { - return starts_with_bom(s.begin(), s.end()); - } - -} // namespace utf8 - -#endif // header guard - + inline std::u8string utf16tou8(const std::u16string& s) + { + std::u8string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u8string utf16tou8(std::u16string_view s) + { + std::u8string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u16string utf8to16(const std::u8string& s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u16string utf8to16(const std::u8string_view& s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u8string utf32tou8(const std::u32string& s) + { + std::u8string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u8string utf32tou8(const std::u32string_view& s) + { + std::u8string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u32string utf8to32(const std::u8string& s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u32string utf8to32(const std::u8string_view& s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::size_t find_invalid(const std::u8string& s) + { + std::u8string::const_iterator invalid = find_invalid(s.begin(), s.end()); + return (invalid == s.end()) ? std::string_view::npos + : static_cast(invalid - s.begin()); + } + + inline bool is_valid(const std::u8string& s) + { + return is_valid(s.begin(), s.end()); + } + + inline std::u8string replace_invalid(const std::u8string& s, char32_t replacement) + { + std::u8string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::u8string replace_invalid(const std::u8string& s) + { + std::u8string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline bool starts_with_bom(const std::u8string& s) + { + return starts_with_bom(s.begin(), s.end()); + } + +} // namespace utf8 + +#endif // header guard diff --git a/Include/Pipe/Extern/utf8/unchecked.h b/Include/Pipe/Extern/utf8/unchecked.h index f3cb627f..7546d79c 100644 --- a/Include/Pipe/Extern/utf8/unchecked.h +++ b/Include/Pipe/Extern/utf8/unchecked.h @@ -32,256 +32,273 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { - namespace unchecked - { - template - octet_iterator append(utfchar32_t cp, octet_iterator result) - { - return internal::append(cp, result); - } + namespace unchecked + { + template + octet_iterator append(utfchar32_t cp, octet_iterator result) + { + return internal::append(cp, result); + } - template - word_iterator append16(utfchar32_t cp, word_iterator result) - { - return internal::append16(cp, result); - } + template + word_iterator append16(utfchar32_t cp, word_iterator result) + { + return internal::append16(cp, result); + } - template - output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement) - { - while (start != end) { - octet_iterator sequence_start = start; - internal::utf_error err_code = utf8::internal::validate_next(start, end); - switch (err_code) { - case internal::UTF8_OK : - for (octet_iterator it = sequence_start; it != start; ++it) - *out++ = *it; - break; - case internal::NOT_ENOUGH_ROOM: - out = utf8::unchecked::append(replacement, out); - start = end; - break; - case internal::INVALID_LEAD: - out = utf8::unchecked::append(replacement, out); - ++start; - break; - case internal::INCOMPLETE_SEQUENCE: - case internal::OVERLONG_SEQUENCE: - case internal::INVALID_CODE_POINT: - out = utf8::unchecked::append(replacement, out); - ++start; - // just one replacement mark for the sequence - while (start != end && utf8::internal::is_trail(*start)) - ++start; - break; - } - } - return out; - } + template + output_iterator replace_invalid( + octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement) + { + while (start != end) + { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) + { + case internal::UTF8_OK: + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + out = utf8::unchecked::append(replacement, out); + start = end; + break; + case internal::INVALID_LEAD: + out = utf8::unchecked::append(replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::unchecked::append(replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } - template - inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) - { - static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd); - return utf8::unchecked::replace_invalid(start, end, out, replacement_marker); - } + template + inline output_iterator replace_invalid( + octet_iterator start, octet_iterator end, output_iterator out) + { + static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::unchecked::replace_invalid(start, end, out, replacement_marker); + } - inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); - return result; - } + inline std::string replace_invalid(const std::string& s, utfchar32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } - inline std::string replace_invalid(const std::string& s) - { - std::string result; - replace_invalid(s.begin(), s.end(), std::back_inserter(result)); - return result; - } + inline std::string replace_invalid(const std::string& s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } - template - utfchar32_t next(octet_iterator& it) - { - utfchar32_t cp = utf8::internal::mask8(*it); - switch (utf8::internal::sequence_length(it)) { - case 1: - break; - case 2: - it++; - cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); - break; - case 3: - ++it; - cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); - ++it; - cp += (*it) & 0x3f; - break; - case 4: - ++it; - cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); - ++it; - cp += (utf8::internal::mask8(*it) << 6) & 0xfff; - ++it; - cp += (*it) & 0x3f; - break; - } - ++it; - return cp; - } + template + utfchar32_t next(octet_iterator& it) + { + utfchar32_t cp = utf8::internal::mask8(*it); + switch (utf8::internal::sequence_length(it)) + { + case 1: break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (utf8::internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } - template - utfchar32_t peek_next(octet_iterator it) - { - return utf8::unchecked::next(it); - } + template + utfchar32_t peek_next(octet_iterator it) + { + return utf8::unchecked::next(it); + } - template - utfchar32_t next16(word_iterator& it) - { - utfchar32_t cp = utf8::internal::mask16(*it++); - if (utf8::internal::is_lead_surrogate(cp)) - return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET; - return cp; - } + template + utfchar32_t next16(word_iterator& it) + { + utfchar32_t cp = utf8::internal::mask16(*it++); + if (utf8::internal::is_lead_surrogate(cp)) + return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET; + return cp; + } - template - utfchar32_t prior(octet_iterator& it) - { - while (utf8::internal::is_trail(*(--it))) ; - octet_iterator temp = it; - return utf8::unchecked::next(temp); - } + template + utfchar32_t prior(octet_iterator& it) + { + while (utf8::internal::is_trail(*(--it))) + ; + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } - template - void advance(octet_iterator& it, distance_type n) - { - const distance_type zero(0); - if (n < zero) { - // backward - for (distance_type i = n; i < zero; ++i) - utf8::unchecked::prior(it); - } else { - // forward - for (distance_type i = zero; i < n; ++i) - utf8::unchecked::next(it); - } - } + template + void advance(octet_iterator& it, distance_type n) + { + const distance_type zero(0); + if (n < zero) + { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::unchecked::prior(it); + } + else + { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::unchecked::next(it); + } + } - template - typename std::iterator_traits::difference_type - distance(octet_iterator first, octet_iterator last) - { - typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) - utf8::unchecked::next(first); - return dist; - } + template + typename std::iterator_traits::difference_type distance( + octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::unchecked::next(first); + return dist; + } - template - octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { - while (start != end) { - utfchar32_t cp = utf8::internal::mask16(*start++); - // Take care of surrogate pairs first - if (utf8::internal::is_lead_surrogate(cp)) { - if (start == end) - return result; - utfchar32_t trail_surrogate = utf8::internal::mask16(*start++); - cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; - } - result = utf8::unchecked::append(cp, result); - } - return result; - } + template + octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) + { + utfchar32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) + { + if (start == end) + return result; + utfchar32_t trail_surrogate = utf8::internal::mask16(*start++); + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + } + result = utf8::unchecked::append(cp, result); + } + return result; + } - template - u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result) - { - while (start < end) { - utfchar32_t cp = utf8::unchecked::next(start); - if (cp > 0xffff) { //make a surrogate pair - *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); - *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); - } - else - *result++ = static_cast(cp); - } - return result; - } + template + u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) + { + utfchar32_t cp = utf8::unchecked::next(start); + if (cp > 0xffff) + { // make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = + static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } - template - octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result) - { - while (start != end) - result = utf8::unchecked::append(*(start++), result); + template + octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::unchecked::append(*(start++), result); - return result; - } + return result; + } - template - u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result) - { - while (start < end) - (*result++) = utf8::unchecked::next(start); + template + u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::unchecked::next(start); - return result; - } + return result; + } - // The iterator class - template - class iterator { - octet_iterator it; - public: - typedef utfchar32_t value_type; - typedef utfchar32_t* pointer; - typedef utfchar32_t& reference; - typedef std::ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - iterator () {} - explicit iterator (const octet_iterator& octet_it): it(octet_it) {} - // the default "big three" are OK - octet_iterator base () const { return it; } - utfchar32_t operator * () const - { - octet_iterator temp = it; - return utf8::unchecked::next(temp); - } - bool operator == (const iterator& rhs) const - { - return (it == rhs.it); - } - bool operator != (const iterator& rhs) const - { - return !(operator == (rhs)); - } - iterator& operator ++ () - { - ::std::advance(it, utf8::internal::sequence_length(it)); - return *this; - } - iterator operator ++ (int) - { - iterator temp = *this; - ::std::advance(it, utf8::internal::sequence_length(it)); - return temp; - } - iterator& operator -- () - { - utf8::unchecked::prior(it); - return *this; - } - iterator operator -- (int) - { - iterator temp = *this; - utf8::unchecked::prior(it); - return temp; - } - }; // class iterator + // The iterator class + template + class iterator + { + octet_iterator it; - } // namespace utf8::unchecked -} // namespace utf8 + public: + typedef utfchar32_t value_type; + typedef utfchar32_t* pointer; + typedef utfchar32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + iterator() {} + explicit iterator(const octet_iterator& octet_it) : it(octet_it) {} + // the default "big three" are OK + octet_iterator base() const + { + return it; + } + utfchar32_t operator*() const + { + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + bool operator==(const iterator& rhs) const + { + return (it == rhs.it); + } + bool operator!=(const iterator& rhs) const + { + return !(operator==(rhs)); + } + iterator& operator++() + { + ::std::advance(it, utf8::internal::sequence_length(it)); + return *this; + } + iterator operator++(int) + { + iterator temp = *this; + ::std::advance(it, utf8::internal::sequence_length(it)); + return temp; + } + iterator& operator--() + { + utf8::unchecked::prior(it); + return *this; + } + iterator operator--(int) + { + iterator temp = *this; + utf8::unchecked::prior(it); + return temp; + } + }; // class iterator + } // namespace unchecked +} // namespace utf8 -#endif // header guard +#endif // header guard diff --git a/Src/Pipe/Extern/README.md b/Src/Pipe/Extern/README.md new file mode 100644 index 00000000..d72188ae --- /dev/null +++ b/Src/Pipe/Extern/README.md @@ -0,0 +1,8 @@ +# External Dependencies + +External dependencies that get compiled directly into Pipe will be in this folder. + +Their code will be unchanged except for includes to same library headers. + +Most of these dependencies as well, are compiled privately to pipe and won't be included from Pipe headers. + diff --git a/Include/Pipe/Extern/portable-file-dialogs.h b/Src/Pipe/Extern/portable-file-dialogs.h similarity index 100% rename from Include/Pipe/Extern/portable-file-dialogs.h rename to Src/Pipe/Extern/portable-file-dialogs.h diff --git a/Include/Pipe/Extern/yyjson.c b/Src/Pipe/Extern/yyjson.c similarity index 100% rename from Include/Pipe/Extern/yyjson.c rename to Src/Pipe/Extern/yyjson.c diff --git a/Include/Pipe/Extern/yyjson.h b/Src/Pipe/Extern/yyjson.h similarity index 100% rename from Include/Pipe/Extern/yyjson.h rename to Src/Pipe/Extern/yyjson.h diff --git a/Src/PipeSerialize.cpp b/Src/PipeSerialize.cpp index 78eaa09d..98b32b48 100644 --- a/Src/PipeSerialize.cpp +++ b/Src/PipeSerialize.cpp @@ -10,9 +10,6 @@ #include "Pipe/Extern/yyjson.h" #include "PipeMath.h" -// Include yyjson.c to get it compiled -#include "Pipe/Extern/yyjson.c" - static void* yyjson_malloc(void* ctx, p::sizet size) {