From 10cf1b25d7c8917bd47234a2550b344adb020b6f Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 26 Sep 2025 15:00:25 +0300 Subject: [PATCH 01/17] init --- pp/series_data/benchmarks/BUILD | 10 ++ .../benchmarks/serializer_benchmark.cpp | 128 ++++++++++++++++++ pp/series_data/serialization/serializer.h | 16 +++ 3 files changed, 154 insertions(+) create mode 100644 pp/series_data/benchmarks/serializer_benchmark.cpp diff --git a/pp/series_data/benchmarks/BUILD b/pp/series_data/benchmarks/BUILD index e6d088b9f2..8b59537f50 100644 --- a/pp/series_data/benchmarks/BUILD +++ b/pp/series_data/benchmarks/BUILD @@ -8,4 +8,14 @@ cc_binary( "//:series_data", "@google_benchmark//:benchmark_main", ], +) + +cc_binary( + name = "serializer", + srcs = ["serializer_benchmark.cpp"], + malloc = "@jemalloc", + deps = [ + "//:series_data", + "@google_benchmark//:benchmark_main", + ], ) \ No newline at end of file diff --git a/pp/series_data/benchmarks/serializer_benchmark.cpp b/pp/series_data/benchmarks/serializer_benchmark.cpp new file mode 100644 index 0000000000..8af0133a51 --- /dev/null +++ b/pp/series_data/benchmarks/serializer_benchmark.cpp @@ -0,0 +1,128 @@ +#include +#include + +#include + +#include +#include + +#include "bare_bones/preprocess.h" +#include "series_data/encoder.h" +#include "series_data/querier/query.h" +#include "series_data/serialization/serializer.h" + +namespace { + +using BareBones::StreamVByte::CompactSequence; +using BareBones::StreamVByte::Sequence; + +struct PROMPP_ATTRIBUTE_PACKED SeriesSample { + uint32_t series_id; + int64_t timestamp; + double value; +}; + +const BareBones::Vector& get_samples_for_benchmark() { + constexpr auto get_file_name = [] -> std::string { + if (auto& context = benchmark::internal::GetGlobalContext(); context != nullptr) { + return context->operator[]("wal_file"); + } + + return {}; + }; + + static BareBones::Vector samples_from_file; + if (samples_from_file.empty()) [[likely]] { + std::ifstream istrm(get_file_name(), std::ios::binary); + istrm >> samples_from_file; + } + + return samples_from_file; +} + +void BenchmarkWalSerializer(benchmark::State& state) { + const auto& samples = get_samples_for_benchmark(); + const double percent = state.range(0) / 100.0; + const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); + const auto min_ts = min->timestamp; + const auto max_ts = max->timestamp; + const auto delta_ts = max_ts - min_ts; + + series_data::DataStorage storage; + series_data::Encoder encoder{storage}; + + for (const auto& sample : samples) { + if (sample.timestamp < min_ts + delta_ts * percent) { + encoder.encode(sample.series_id, sample.timestamp, sample.value); + } + } + + series_data::querier::QueriedChunkList chunk_list; + { + std::vector v(storage.open_chunks.size()); + std::iota(v.begin(), v.end(), 0); + + std::mt19937 g(42); + std::ranges::shuffle(v, g); + v.resize(v.size() / 10); + + chunk_list.reserve(v.size()); + for (uint32_t ls_id : v) { + chunk_list.emplace_back(ls_id); + } + } + + for ([[maybe_unused]] auto _ : state) { + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; + + serializer_.serialize(chunk_list, stream); + state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + } +} + +void BenchmarkWalConstantSerializer(benchmark::State& state) { + const auto& samples = get_samples_for_benchmark(); + const double percent = state.range(0) / 100.0; + const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); + const auto min_ts = min->timestamp; + const auto max_ts = max->timestamp; + const auto delta_ts = max_ts - min_ts; + + series_data::DataStorage storage; + series_data::Encoder encoder{storage}; + + for (const auto& sample : samples) { + if (sample.timestamp <= min_ts + delta_ts * percent) { + encoder.encode(sample.series_id, sample.timestamp, sample.series_id); + } + } + + series_data::querier::QueriedChunkList chunk_list; + { + std::vector v(storage.open_chunks.size()); + std::iota(v.begin(), v.end(), 0); + + std::mt19937 g(42); + std::ranges::shuffle(v, g); + v.resize(v.size() / 10); + + chunk_list.reserve(v.size()); + for (uint32_t ls_id : v) { + chunk_list.emplace_back(ls_id); + } + } + + for ([[maybe_unused]] auto _ : state) { + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; + + serializer_.serialize(chunk_list, stream); + state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + } +} + +BENCHMARK(BenchmarkWalSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100)->Iterations(1); +BENCHMARK(BenchmarkWalConstantSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100)->Iterations(1); + +} // namespace diff --git a/pp/series_data/serialization/serializer.h b/pp/series_data/serialization/serializer.h index 814105959c..9060fdffb4 100644 --- a/pp/series_data/serialization/serializer.h +++ b/pp/series_data/serialization/serializer.h @@ -42,6 +42,8 @@ class Serializer { template void serialize_impl(const ChunkList& chunks, Stream& stream) { + static constexpr bool f = true; + const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); TimestampStreamsData timestamp_streams_data; @@ -50,6 +52,10 @@ class Serializer { uint32_t chunk_count = get_chunk_count(chunks); auto serialized_chunks = create_serialized_chunks(chunks, chunk_count, timestamp_streams_data, data_size); + if constexpr (f) { + std::cout << "chunks size: " << double(serialized_chunks.size() * sizeof(serialized_chunks[0])) / 1024 / 1024 << '\n'; + } + if constexpr (BareBones::concepts::has_reserve) { stream.reserve(data_size + kReservedBytesForReader.size()); } @@ -58,6 +64,16 @@ class Serializer { write_serialized_chunks(serialized_chunks, stream); } + if constexpr (f) { + auto max_offset = std::max_element(timestamp_streams_data.stream_offsets.begin(), timestamp_streams_data.stream_offsets.end(), [](auto a, auto b) { + return a.second < b.second; + })->second; + auto min_offset = std::min_element(timestamp_streams_data.stream_offsets.begin(), timestamp_streams_data.stream_offsets.end(), [](auto a, auto b) { + return a.second < b.second; + })->second; + std::cout << "streams data size: " << double(max_offset - min_offset) / 1024 / 1024 << '\n'; + } + write_chunks_data(chunks, timestamp_streams_data, stream); stream.write(kReservedBytesForReader.data(), kReservedBytesForReader.size()); } From 1da3536e927af91bdf9968a31cc58a6aef097db3 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Tue, 30 Sep 2025 17:03:34 +0300 Subject: [PATCH 02/17] new model initial --- pp/series_data/serialization/serializer.h | 216 ++++++++++++++++-- .../serializer_deserializer_tests.cpp | 2 +- 2 files changed, 200 insertions(+), 18 deletions(-) diff --git a/pp/series_data/serialization/serializer.h b/pp/series_data/serialization/serializer.h index 9060fdffb4..ddf352dcae 100644 --- a/pp/series_data/serialization/serializer.h +++ b/pp/series_data/serialization/serializer.h @@ -7,7 +7,7 @@ #include "series_data/querier/query.h" namespace series_data::serialization { - +inline namespace old_ { class Serializer { public: explicit Serializer(const DataStorage& storage) : storage_(storage) {} @@ -42,8 +42,6 @@ class Serializer { template void serialize_impl(const ChunkList& chunks, Stream& stream) { - static constexpr bool f = true; - const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); TimestampStreamsData timestamp_streams_data; @@ -52,10 +50,6 @@ class Serializer { uint32_t chunk_count = get_chunk_count(chunks); auto serialized_chunks = create_serialized_chunks(chunks, chunk_count, timestamp_streams_data, data_size); - if constexpr (f) { - std::cout << "chunks size: " << double(serialized_chunks.size() * sizeof(serialized_chunks[0])) / 1024 / 1024 << '\n'; - } - if constexpr (BareBones::concepts::has_reserve) { stream.reserve(data_size + kReservedBytesForReader.size()); } @@ -64,16 +58,6 @@ class Serializer { write_serialized_chunks(serialized_chunks, stream); } - if constexpr (f) { - auto max_offset = std::max_element(timestamp_streams_data.stream_offsets.begin(), timestamp_streams_data.stream_offsets.end(), [](auto a, auto b) { - return a.second < b.second; - })->second; - auto min_offset = std::min_element(timestamp_streams_data.stream_offsets.begin(), timestamp_streams_data.stream_offsets.end(), [](auto a, auto b) { - return a.second < b.second; - })->second; - std::cout << "streams data size: " << double(max_offset - min_offset) / 1024 / 1024 << '\n'; - } - write_chunks_data(chunks, timestamp_streams_data, stream); stream.write(kReservedBytesForReader.data(), kReservedBytesForReader.size()); } @@ -324,5 +308,203 @@ class Serializer { } } }; +} // namespace old_ + +inline namespace new_ { +struct SerializedData { + BareBones::Vector chunks; + BareBones::Memory bytes_buffer_; +}; + +class Serializer { + public: + explicit Serializer(const DataStorage& storage) : storage_(storage) {} + + SerializedData serialize(const querier::QueriedChunkList& queried_chunks) { return serialize_impl(queried_chunks); } + + SerializedData serialize() { return serialize_impl(storage_.chunks()); } + + private: + struct TimestampStreamsData { + using TimestampId = uint32_t; + using Offset = uint32_t; + + static constexpr Offset kInvalidOffset = std::numeric_limits::max(); + + phmap::flat_hash_map stream_offsets; + phmap::flat_hash_map finalized_stream_offsets; + }; + + using QueriedChunk = querier::QueriedChunk; + using QueriedChunkList = querier::QueriedChunkList; + using SerializedChunk = chunk::SerializedChunk; + using SerializedChunkList = chunk::SerializedChunkList; + + const DataStorage& storage_; + + template + SerializedData serialize_impl(const ChunkList& chunks) { + const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); + + const uint32_t chunk_count = get_chunk_count(chunks); + + SerializedData result; + result.chunks.reserve(chunk_count); + + uint32_t data_size = 0; + + TimestampStreamsData timestamp_streams_data; + for (auto& chunk_data : chunks) { + using enum chunk::DataChunk::Type; + + if (chunk_data.is_open()) [[likely]] { + if (const auto& chunk = get_chunk(chunk_data); !chunk.is_empty()) [[likely]] { + fill_serialized_chunk(chunk, result.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, result.bytes_buffer_); + } + } else { + fill_serialized_chunk(get_chunk(chunk_data), result.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, + data_size, result.bytes_buffer_); + } + } + + result.bytes_buffer_.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); + std::memcpy(result.bytes_buffer_.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); + + return result; + } + + template + PROMPP_ALWAYS_INLINE static uint32_t get_chunk_count(const ChunkList& chunks) noexcept { + if constexpr (std::is_same_v) { + return chunks.non_empty_chunk_count(); + } else { + return chunks.size(); + } + } + + template + void fill_serialized_chunk(const chunk::DataChunk& chunk, + SerializedChunk& serialized_chunk, + TimestampStreamsData& timestamp_streams_data, + uint32_t& data_size, + BareBones::Memory& buffer) const noexcept { + using enum EncodingType; + + serialized_chunk.encoding_state = chunk.encoding_state; + + if (chunk.encoding_state.encoding_type != kGorilla) { + fill_timestamp_stream_offset(timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, data_size, buffer); + } + + switch (chunk.encoding_state.encoding_type) { + case kUint32Constant: { + serialized_chunk.store_value_in_offset(chunk.encoder.uint32_constant); + break; + } + + case kFloat32Constant: { + serialized_chunk.store_value_in_offset(chunk.encoder.float32_constant); + break; + } + + case kDoubleConstant: { + serialized_chunk.set_offset(data_size); + buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::DoubleConstantEncoder)); + std::memcpy(buffer.control_block().data + data_size, &storage_.variant_encoders[chunk.encoder.external_index].double_constant, + sizeof(encoder::value::DoubleConstantEncoder)); + data_size += sizeof(encoder::value::DoubleConstantEncoder); + break; + } + + case kTwoDoubleConstant: { + serialized_chunk.set_offset(data_size); + buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::TwoDoubleConstantEncoder)); + std::memcpy(buffer.control_block().data + data_size, &storage_.variant_encoders[chunk.encoder.external_index].two_double_constant, + sizeof(encoder::value::TwoDoubleConstantEncoder)); + data_size += sizeof(encoder::value::TwoDoubleConstantEncoder); + break; + } + + case kAscInteger: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage_.get_asc_integer_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kAscIntegerThenValuesGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage_.get_asc_integer_then_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kValuesGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage_.get_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage_.get_gorilla_encoder_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + default: { + assert(chunk.encoding_state.encoding_type != kUnknown); + } + } + } + + template + [[nodiscard]] const chunk::DataChunk& get_chunk(const QueriedChunk& queried_chunk) const noexcept { + if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { + return storage_.open_chunks[queried_chunk.series_id()]; + } else { + auto finalized_chunk_it = storage_.finalized_chunks.find(queried_chunk.series_id())->second.begin(); + std::advance(finalized_chunk_it, queried_chunk.finalized_chunk_id); + return *finalized_chunk_it; + } + } + + template + [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage::SeriesChunkIterator::Data& chunk) noexcept { + return chunk.chunk(); + } + + template + void fill_timestamp_stream_offset(TimestampStreamsData& timestamp_streams_data, + encoder::timestamp::State::Id timestamp_stream_id, + SerializedChunk& serialized_chunk, + uint32_t& data_size, + BareBones::Memory& buffer) const noexcept { + if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { + if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) { + timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); + write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + } + + serialized_chunk.timestamps_offset = timestamp_streams_data.stream_offsets[timestamp_stream_id]; + } else { + if (const auto it = timestamp_streams_data.finalized_stream_offsets.find(timestamp_stream_id); + it == timestamp_streams_data.finalized_stream_offsets.end()) { + timestamp_streams_data.finalized_stream_offsets.emplace(timestamp_stream_id, data_size); + write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + + serialized_chunk.timestamps_offset = timestamp_streams_data.finalized_stream_offsets[timestamp_stream_id]; + } + } + } + + template + static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, + uint32_t& offset, + BareBones::Memory& buffer) { + const auto bytes_count = bit_sequence.size_in_bytes(); + buffer.grow_to_fit_at_least(offset + bytes_count); + std::memcpy(buffer.control_block().data + offset, bit_sequence.raw_bytes(), bytes_count); + offset += bytes_count; + } +}; +} // namespace new_ } // namespace series_data::serialization diff --git a/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp index 15b5d52ebd..b843655ac7 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp @@ -20,7 +20,7 @@ using series_data::encoder::SampleList; using series_data::querier::QueriedChunk; using series_data::querier::QueriedChunkList; using series_data::serialization::Deserializer; -using series_data::serialization::Serializer; +using series_data::serialization::old_::Serializer; class SerializerDeserializerTrait { protected: From 2a9a543baef1362b62db93cb7bfa60c3f61cf5cc Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Wed, 1 Oct 2025 16:00:55 +0300 Subject: [PATCH 03/17] new model bench --- .../benchmarks/serializer_benchmark.cpp | 37 +- pp/series_data/chunk/serialized_chunk.h | 60 ++ pp/series_data/serialization/deserializer.h | 16 + pp/series_data/serialization/serializer.h | 34 +- .../serializer_deserializer_new_tests.cpp | 763 ++++++++++++++++++ 5 files changed, 883 insertions(+), 27 deletions(-) create mode 100644 pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp diff --git a/pp/series_data/benchmarks/serializer_benchmark.cpp b/pp/series_data/benchmarks/serializer_benchmark.cpp index 8af0133a51..08fdf7c3bb 100644 --- a/pp/series_data/benchmarks/serializer_benchmark.cpp +++ b/pp/series_data/benchmarks/serializer_benchmark.cpp @@ -73,11 +73,19 @@ void BenchmarkWalSerializer(benchmark::State& state) { } for ([[maybe_unused]] auto _ : state) { - series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + series_data::serialization::new_::Serializer serializer_{storage}; - serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + const auto x = serializer_.serialize(chunk_list); + } + + { + series_data::serialization::new_::Serializer serializer_{storage}; + + const auto x = serializer_.serialize(chunk_list); + state.counters["total mem"] = + benchmark::Counter(x.chunks.allocated_memory() + x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["chunk mem"] = benchmark::Counter(x.chunks.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["stream mem"] = benchmark::Counter(x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } @@ -114,15 +122,24 @@ void BenchmarkWalConstantSerializer(benchmark::State& state) { } for ([[maybe_unused]] auto _ : state) { - series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + series_data::serialization::new_::Serializer serializer_{storage}; + // BareBones::ShrinkedToFitOStringStream stream; + + const auto x = serializer_.serialize(chunk_list); + } + + { + series_data::serialization::new_::Serializer serializer_{storage}; - serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + const auto x = serializer_.serialize(chunk_list); + state.counters["total mem"] = + benchmark::Counter(x.chunks.allocated_memory() + x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["chunk mem"] = benchmark::Counter(x.chunks.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["stream mem"] = benchmark::Counter(x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } -BENCHMARK(BenchmarkWalSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100)->Iterations(1); -BENCHMARK(BenchmarkWalConstantSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100)->Iterations(1); +BENCHMARK(BenchmarkWalSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100); +BENCHMARK(BenchmarkWalConstantSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100); } // namespace diff --git a/pp/series_data/chunk/serialized_chunk.h b/pp/series_data/chunk/serialized_chunk.h index bfa07be766..489a478f82 100644 --- a/pp/series_data/chunk/serialized_chunk.h +++ b/pp/series_data/chunk/serialized_chunk.h @@ -81,4 +81,64 @@ class SerializedChunkIterator { } }; +struct SerializedData { + BareBones::Vector chunks; + BareBones::Memory bytes_buffer_; +}; + +class SerializedChunkIteratorNew { + public: + class Data { + public: + Data(const SerializedData& serialized_data, SerializedChunkSpan chunks) + : serialized_data_(serialized_data), chunk_iterator_(chunks.begin()), chunk_end_iterator_(chunks.end()) {} + + [[nodiscard]] PROMPP_ALWAYS_INLINE const SerializedChunk& chunk() const noexcept { return *chunk_iterator_; } + [[nodiscard]] PROMPP_ALWAYS_INLINE const SerializedData& data() const noexcept { return serialized_data_; } + [[nodiscard]] PROMPP_ALWAYS_INLINE PromPP::Primitives::LabelSetID series_id() const noexcept { return chunk_iterator_->label_set_id; } + + private: + friend class SerializedChunkIteratorNew; + + const SerializedData& serialized_data_; + SerializedChunkSpan::iterator chunk_iterator_; + SerializedChunkSpan::iterator chunk_end_iterator_; + + PROMPP_ALWAYS_INLINE void next_value() noexcept { ++chunk_iterator_; } + + [[nodiscard]] PROMPP_ALWAYS_INLINE bool has_value() const noexcept { return chunk_iterator_ != chunk_end_iterator_; } + }; + + using iterator_category = std::forward_iterator_tag; + using value_type = Data; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit SerializedChunkIteratorNew(const SerializedData& serialized_data) : data_(serialized_data, get_chunks(serialized_data)) {} + + [[nodiscard]] PROMPP_ALWAYS_INLINE const Data& operator*() const noexcept { return data_; } + [[nodiscard]] PROMPP_ALWAYS_INLINE const Data* operator->() const noexcept { return &data_; } + + PROMPP_ALWAYS_INLINE SerializedChunkIteratorNew& operator++() noexcept { + data_.next_value(); + return *this; + } + + PROMPP_ALWAYS_INLINE SerializedChunkIteratorNew operator++(int) noexcept { + const auto it = *this; + ++*this; + return it; + } + + PROMPP_ALWAYS_INLINE bool operator==(const IteratorSentinel&) const noexcept { return !data_.has_value(); } + + private: + Data data_; + + [[nodiscard]] PROMPP_ALWAYS_INLINE static SerializedChunkSpan get_chunks(const SerializedData& serialized_data) noexcept { + return {serialized_data.chunks.data(), serialized_data.chunks.size()}; + } +}; + } // namespace series_data::chunk \ No newline at end of file diff --git a/pp/series_data/serialization/deserializer.h b/pp/series_data/serialization/deserializer.h index 952aacb17c..3520308323 100644 --- a/pp/series_data/serialization/deserializer.h +++ b/pp/series_data/serialization/deserializer.h @@ -23,6 +23,11 @@ class Deserializer { uint32_t chunks_count = *reinterpret_cast(buffer.data()); return {reinterpret_cast(buffer.data() + sizeof(uint32_t)), chunks_count}; } + + [[nodiscard]] PROMPP_ALWAYS_INLINE static chunk::SerializedChunkSpan get_chunks(const chunk::SerializedData& serialized_data) noexcept { + return {serialized_data.chunks.data(), serialized_data.chunks.size()}; + } + [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(std::span buffer, const chunk::SerializedChunk& chunk) { decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { @@ -30,6 +35,17 @@ class Deserializer { }); return iterator; } + + [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedData& serialized_data, + const chunk::SerializedChunk& chunk) { + decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); + std::span buffer{serialized_data.bytes_buffer_.control_block().data, serialized_data.bytes_buffer_.size()}; + Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { + iterator = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + return iterator; + } + [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedChunkIterator::Data& chunk) { return create_decode_iterator(chunk.buffer(), chunk.chunk()); } diff --git a/pp/series_data/serialization/serializer.h b/pp/series_data/serialization/serializer.h index ddf352dcae..23b481ddde 100644 --- a/pp/series_data/serialization/serializer.h +++ b/pp/series_data/serialization/serializer.h @@ -311,18 +311,14 @@ class Serializer { } // namespace old_ inline namespace new_ { -struct SerializedData { - BareBones::Vector chunks; - BareBones::Memory bytes_buffer_; -}; class Serializer { public: explicit Serializer(const DataStorage& storage) : storage_(storage) {} - SerializedData serialize(const querier::QueriedChunkList& queried_chunks) { return serialize_impl(queried_chunks); } + chunk::SerializedData serialize(const querier::QueriedChunkList& queried_chunks) { return serialize_impl(queried_chunks); } - SerializedData serialize() { return serialize_impl(storage_.chunks()); } + chunk::SerializedData serialize() { return serialize_impl(storage_.chunks()); } private: struct TimestampStreamsData { @@ -343,12 +339,12 @@ class Serializer { const DataStorage& storage_; template - SerializedData serialize_impl(const ChunkList& chunks) { + chunk::SerializedData serialize_impl(const ChunkList& chunks) { const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); const uint32_t chunk_count = get_chunk_count(chunks); - SerializedData result; + chunk::SerializedData result; result.chunks.reserve(chunk_count); uint32_t data_size = 0; @@ -392,7 +388,7 @@ class Serializer { serialized_chunk.encoding_state = chunk.encoding_state; - if (chunk.encoding_state.encoding_type != kGorilla) { + if (chunk.encoding_state.encoding_type != kGorilla) [[likely]] { fill_timestamp_stream_offset(timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, data_size, buffer); } @@ -478,31 +474,35 @@ class Serializer { uint32_t& data_size, BareBones::Memory& buffer) const noexcept { if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { - if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) { + if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) [[unlikely]] { timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); + serialized_chunk.timestamps_offset = data_size; write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + } else { + serialized_chunk.timestamps_offset = it->second; } serialized_chunk.timestamps_offset = timestamp_streams_data.stream_offsets[timestamp_stream_id]; } else { if (const auto it = timestamp_streams_data.finalized_stream_offsets.find(timestamp_stream_id); - it == timestamp_streams_data.finalized_stream_offsets.end()) { + it == timestamp_streams_data.finalized_stream_offsets.end()) [[unlikely]] { timestamp_streams_data.finalized_stream_offsets.emplace(timestamp_stream_id, data_size); + serialized_chunk.timestamps_offset = data_size; write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); - - serialized_chunk.timestamps_offset = timestamp_streams_data.finalized_stream_offsets[timestamp_stream_id]; + } else { + serialized_chunk.timestamps_offset = it->second; } } } template static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, - uint32_t& offset, + uint32_t& data_size, BareBones::Memory& buffer) { const auto bytes_count = bit_sequence.size_in_bytes(); - buffer.grow_to_fit_at_least(offset + bytes_count); - std::memcpy(buffer.control_block().data + offset, bit_sequence.raw_bytes(), bytes_count); - offset += bytes_count; + buffer.grow_to_fit_at_least(data_size + bytes_count); + std::memcpy(buffer.control_block().data + data_size, bit_sequence.raw_bytes(), bytes_count); + data_size += bytes_count; } }; } // namespace new_ diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp new file mode 100644 index 0000000000..de15914c46 --- /dev/null +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -0,0 +1,763 @@ +#include + +#include "bare_bones/streams.h" +#include "series_data/data_storage.h" +#include "series_data/encoder.h" +#include "series_data/encoder/bit_sequence.h" +#include "series_data/serialization/deserializer.h" +#include "series_data/serialization/serializer.h" + +namespace { + +using BareBones::Encoding::Gorilla::STALE_NAN; +using series_data::ChunkFinalizer; +using series_data::DataStorage; +using series_data::Encoder; +using series_data::EncodingType; +using series_data::chunk::DataChunk; +using series_data::decoder::DecodeIteratorSentinel; +using series_data::encoder::Sample; +using series_data::encoder::SampleList; +using series_data::querier::QueriedChunk; +using series_data::querier::QueriedChunkList; +using series_data::serialization::Deserializer; +using series_data::serialization::new_::Serializer; + +class SerializerDeserializerTrait { + protected: + DataStorage storage_; + Serializer serializer_{storage_}; + Encoder<> encoder_{storage_}; + + template + [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_chunk(DecodeIterator iterator) { + SampleList result; + std::ranges::copy(iterator, DecodeIteratorSentinel{}, std::back_insert_iterator(result)); + return result; + } +}; + +class SerializerDeserializerFixtureNew : public SerializerDeserializerTrait, public testing::Test {}; + +TEST_F(SerializerDeserializerFixtureNew, EmptyChunksList) { + // Arrange + + // Act + const auto serialized = serializer_.serialize({}); + // const Deserializer deserializer(serialized); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(0U, serialized.chunks.size()); + ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized.bytes_buffer_.size()); +} + +TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimestampStream) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(1, 1, 1.0); + + encoder_.encode(0, 2, 1.0); + encoder_.encode(1, 2, 1.0); + + encoder_.encode(0, 3, 1.0); + encoder_.encode(1, 3, 1.0); + + // Act + const auto serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}}); + // const Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(2U, Deserializer::get_chunks(serialized).size()); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + EXPECT_EQ(Deserializer::get_chunks(serialized)[0].timestamps_offset, Deserializer::get_chunks(serialized)[1].timestamps_offset); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); +} + +TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUniqueTimestampStream) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(1, 1, 1.0); + + encoder_.encode(0, 2, 1.0); + encoder_.encode(1, 2, 1.0); + + encoder_.encode(0, 3, 1.0); + encoder_.encode(1, 3, 1.0); + + encoder_.encode(2, 1, 2.0); + encoder_.encode(2, 2, 2.0); + encoder_.encode(2, 3, 2.0); + + // Act + const auto serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); + // const Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(3U, Deserializer::get_chunks(serialized).size()); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); + EXPECT_EQ(Deserializer::get_chunks(serialized)[0].timestamps_offset, Deserializer::get_chunks(serialized)[1].timestamps_offset); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 2.0}, + {.timestamp = 2, .value = 2.0}, + {.timestamp = 3, .value = 2.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); +} + +TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { + // Arrange + encoder_.encode(0, 100, 1.0); + + encoder_.encode(1, 101, 1.1); + + encoder_.encode(2, 102, 1.1); + encoder_.encode(2, 103, 1.2); + + encoder_.encode(3, 104, 1.0); + encoder_.encode(3, 105, 2.0); + encoder_.encode(3, 106, 3.0); + + encoder_.encode(4, 107, 1.1); + encoder_.encode(20, 107, 1.1); + encoder_.encode(4, 108, 2.1); + encoder_.encode(20, 108, 2.1); + encoder_.encode(4, 109, 3.1); + + encoder_.encode(5, 110, 1.1); + encoder_.encode(5, 111, 2.1); + encoder_.encode(5, 112, 3.1); + + encoder_.encode(6, 113, 2.0); + + encoder_.encode(7, 114, -1.0); + encoder_.encode(7, 115, -1.0); + + encoder_.encode(8, 120, 1.0); + encoder_.encode(8, 121, 2.0); + encoder_.encode(8, 122, 3.0); + encoder_.encode(8, 123, 4.1); + + // Act + const auto serialized = serializer_.serialize(); + // Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); + ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 101, .value = 1.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 102, .value = 1.1}, + {.timestamp = 103, .value = 1.2}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 104, .value = 1.0}, + {.timestamp = 105, .value = 2.0}, + {.timestamp = 106, .value = 3.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.1}, + {.timestamp = 108, .value = 2.1}, + {.timestamp = 109, .value = 3.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 110, .value = 1.1}, + {.timestamp = 111, .value = 2.1}, + {.timestamp = 112, .value = 3.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 113, .value = 2.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 114, .value = -1.0}, + {.timestamp = 115, .value = -1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 120, .value = 1.0}, + {.timestamp = 121, .value = 2.0}, + {.timestamp = 122, .value = 3.0}, + {.timestamp = 123, .value = 4.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.1}, + {.timestamp = 108, .value = 2.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); +} + +TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { + // Arrange + encoder_.encode(0, 100, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + + encoder_.encode(1, 101, 1.1); + ChunkFinalizer::finalize(storage_, 1, storage_.open_chunks[1]); + + encoder_.encode(2, 102, 1.1); + encoder_.encode(2, 103, 1.2); + ChunkFinalizer::finalize(storage_, 2, storage_.open_chunks[2]); + + encoder_.encode(3, 104, 1.0); + encoder_.encode(3, 105, 2.0); + encoder_.encode(3, 106, 3.0); + ChunkFinalizer::finalize(storage_, 3, storage_.open_chunks[3]); + + encoder_.encode(4, 107, 1.1); + encoder_.encode(20, 107, 1.1); + encoder_.encode(4, 108, 2.1); + encoder_.encode(20, 108, 2.1); + encoder_.encode(4, 109, 3.1); + ChunkFinalizer::finalize(storage_, 4, storage_.open_chunks[4]); + ChunkFinalizer::finalize(storage_, 20, storage_.open_chunks[20]); + + encoder_.encode(5, 110, 1.1); + encoder_.encode(5, 111, 2.1); + encoder_.encode(5, 112, 3.1); + ChunkFinalizer::finalize(storage_, 5, storage_.open_chunks[5]); + + encoder_.encode(6, 113, 2.0); + ChunkFinalizer::finalize(storage_, 6, storage_.open_chunks[6]); + + encoder_.encode(7, 114, -1.0); + encoder_.encode(7, 115, -1.0); + ChunkFinalizer::finalize(storage_, 7, storage_.open_chunks[7]); + + encoder_.encode(8, 120, 1.0); + encoder_.encode(8, 121, 2.0); + encoder_.encode(8, 122, 3.0); + encoder_.encode(8, 123, 4.1); + ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); + + // Act + const auto serialized = serializer_.serialize(); + // Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); + ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 101, .value = 1.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 102, .value = 1.1}, + {.timestamp = 103, .value = 1.2}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 104, .value = 1.0}, + {.timestamp = 105, .value = 2.0}, + {.timestamp = 106, .value = 3.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.1}, + {.timestamp = 108, .value = 2.1}, + {.timestamp = 109, .value = 3.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 110, .value = 1.1}, + {.timestamp = 111, .value = 2.1}, + {.timestamp = 112, .value = 3.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 113, .value = 2.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 114, .value = -1.0}, + {.timestamp = 115, .value = -1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 120, .value = 1.0}, + {.timestamp = 121, .value = 2.0}, + {.timestamp = 122, .value = 3.0}, + {.timestamp = 123, .value = 4.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.1}, + {.timestamp = 108, .value = 2.1}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); +} + +TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(1, 100, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + + // Act + const auto serialized = serializer_.serialize({QueriedChunk{1}}); + // const Deserializer deserializer(get_buffer()); + + // Assert + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); +} + +TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(0, 101, 1.0); + encoder_.encode(0, 102, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 103, 1.0); + + // Act + const auto serialized = serializer_.serialize(); + // const Deserializer deserializer(get_buffer()); + + // Assert + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + {.timestamp = 101, .value = 1.0}, + {.timestamp = 102, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 103, .value = 1.0}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); +} + +TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(0, 101, STALE_NAN); + + encoder_.encode(1, 102, 1.1); + encoder_.encode(1, 103, STALE_NAN); + + encoder_.encode(2, 104, 1.1); + encoder_.encode(2, 105, 1.2); + encoder_.encode(2, 106, STALE_NAN); + + encoder_.encode(3, 107, 1.0); + encoder_.encode(3, 108, 2.0); + encoder_.encode(3, 109, 3.0); + encoder_.encode(3, 110, STALE_NAN); + + encoder_.encode(4, 111, 1.1); + encoder_.encode(20, 111, 1.1); + encoder_.encode(4, 112, 2.1); + encoder_.encode(20, 112, 2.1); + encoder_.encode(4, 113, 3.1); + encoder_.encode(4, 114, STALE_NAN); + encoder_.encode(20, 113, STALE_NAN); + + encoder_.encode(5, 115, 1.1); + encoder_.encode(5, 116, 2.1); + encoder_.encode(5, 117, 3.1); + encoder_.encode(5, 118, STALE_NAN); + + encoder_.encode(6, 119, 2.0); + encoder_.encode(6, 120, STALE_NAN); + + encoder_.encode(7, 121, -1.0); + encoder_.encode(7, 122, -1.0); + encoder_.encode(7, 123, STALE_NAN); + + encoder_.encode(8, 130, 1.0); + encoder_.encode(8, 131, 2.0); + encoder_.encode(8, 132, 3.0); + encoder_.encode(8, 133, 4.1); + encoder_.encode(8, 134, STALE_NAN); + + // Act + const auto serialized = serializer_.serialize(); + // Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); + EXPECT_TRUE(std::ranges::all_of(Deserializer::get_chunks(serialized), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); + ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + {.timestamp = 101, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 102, .value = 1.1}, + {.timestamp = 103, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 104, .value = 1.1}, + {.timestamp = 105, .value = 1.2}, + {.timestamp = 106, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.0}, + {.timestamp = 108, .value = 2.0}, + {.timestamp = 109, .value = 3.0}, + {.timestamp = 110, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 111, .value = 1.1}, + {.timestamp = 112, .value = 2.1}, + {.timestamp = 113, .value = 3.1}, + {.timestamp = 114, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 115, .value = 1.1}, + {.timestamp = 116, .value = 2.1}, + {.timestamp = 117, .value = 3.1}, + {.timestamp = 118, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 119, .value = 2.0}, + {.timestamp = 120, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 121, .value = -1.0}, + {.timestamp = 122, .value = -1.0}, + {.timestamp = 123, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 130, .value = 1.0}, + {.timestamp = 131, .value = 2.0}, + {.timestamp = 132, .value = 3.0}, + {.timestamp = 133, .value = 4.1}, + {.timestamp = 134, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 111, .value = 1.1}, + {.timestamp = 112, .value = 2.1}, + {.timestamp = 113, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); +} + +TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(0, 101, STALE_NAN); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + + encoder_.encode(1, 102, 1.1); + encoder_.encode(1, 103, STALE_NAN); + ChunkFinalizer::finalize(storage_, 1, storage_.open_chunks[1]); + + encoder_.encode(2, 104, 1.1); + encoder_.encode(2, 105, 1.2); + encoder_.encode(2, 106, STALE_NAN); + ChunkFinalizer::finalize(storage_, 2, storage_.open_chunks[2]); + + encoder_.encode(3, 107, 1.0); + encoder_.encode(3, 108, 2.0); + encoder_.encode(3, 109, 3.0); + encoder_.encode(3, 110, STALE_NAN); + ChunkFinalizer::finalize(storage_, 3, storage_.open_chunks[3]); + + encoder_.encode(4, 111, 1.1); + encoder_.encode(20, 111, 1.1); + encoder_.encode(4, 112, 2.1); + encoder_.encode(20, 112, 2.1); + encoder_.encode(4, 113, 3.1); + encoder_.encode(4, 114, STALE_NAN); + encoder_.encode(20, 113, STALE_NAN); + ChunkFinalizer::finalize(storage_, 4, storage_.open_chunks[4]); + ChunkFinalizer::finalize(storage_, 20, storage_.open_chunks[20]); + + encoder_.encode(5, 115, 1.1); + encoder_.encode(5, 116, 2.1); + encoder_.encode(5, 117, 3.1); + encoder_.encode(5, 118, STALE_NAN); + ChunkFinalizer::finalize(storage_, 5, storage_.open_chunks[5]); + + encoder_.encode(6, 119, 2.0); + encoder_.encode(6, 120, STALE_NAN); + ChunkFinalizer::finalize(storage_, 6, storage_.open_chunks[6]); + + encoder_.encode(7, 121, -1.0); + encoder_.encode(7, 122, -1.0); + encoder_.encode(7, 123, STALE_NAN); + ChunkFinalizer::finalize(storage_, 7, storage_.open_chunks[7]); + + encoder_.encode(8, 130, 1.0); + encoder_.encode(8, 131, 2.0); + encoder_.encode(8, 132, 3.0); + encoder_.encode(8, 133, 4.1); + encoder_.encode(8, 134, STALE_NAN); + ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); + + // Act + const auto serialized = serializer_.serialize(); + // Deserializer deserializer(get_buffer()); + + // Assert + // ASSERT_TRUE(deserializer.is_valid()); + ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); + EXPECT_TRUE(std::ranges::all_of(Deserializer::get_chunks(serialized), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); + ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + {.timestamp = 101, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 102, .value = 1.1}, + {.timestamp = 103, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 104, .value = 1.1}, + {.timestamp = 105, .value = 1.2}, + {.timestamp = 106, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 107, .value = 1.0}, + {.timestamp = 108, .value = 2.0}, + {.timestamp = 109, .value = 3.0}, + {.timestamp = 110, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 111, .value = 1.1}, + {.timestamp = 112, .value = 2.1}, + {.timestamp = 113, .value = 3.1}, + {.timestamp = 114, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 115, .value = 1.1}, + {.timestamp = 116, .value = 2.1}, + {.timestamp = 117, .value = 3.1}, + {.timestamp = 118, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 119, .value = 2.0}, + {.timestamp = 120, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 121, .value = -1.0}, + {.timestamp = 122, .value = -1.0}, + {.timestamp = 123, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 130, .value = 1.0}, + {.timestamp = 131, .value = 2.0}, + {.timestamp = 132, .value = 3.0}, + {.timestamp = 133, .value = 4.1}, + {.timestamp = 134, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 111, .value = 1.1}, + {.timestamp = 112, .value = 2.1}, + {.timestamp = 113, .value = STALE_NAN}, + }, + decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); +} + +/*class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, public testing::Test { + protected: + using DecodedChunks = std::vector; + + DecodedChunks decode_chunks() const { + DecodedChunks result; + for (auto& chunk : Deserializer{get_buffer()}) { + result.emplace_back(decode_chunk(Deserializer::create_decode_iterator(chunk))); + } + return result; + } +}; + +TEST_F(DeserializerIteratorFixtureNew, EmptyChunksList) { + // Arrange + + // Act + serializer_.serialize({}, stream_); + auto decoded_chunks = decode_chunks(); + + // Assert + EXPECT_TRUE(std::ranges::equal(DecodedChunks{}, decoded_chunks)); +} + +TEST_F(DeserializerIteratorFixtureNew, OneChunk) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(0, 2, 1.0); + + // Act + serializer_.serialize({QueriedChunk{0}}, stream_); + auto decoded_chunks = decode_chunks(); + + // Assert + EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}}}, decoded_chunks)); +} + +TEST_F(DeserializerIteratorFixtureNew, TwoChunks) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(1, 2, 1.0); + + // Act + serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}}, stream_); + auto decoded_chunks = decode_chunks(); + + // Assert + EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}}, SampleList{{.timestamp = 2, .value = 1.0}}}, decoded_chunks)); +}*/ + +} // namespace \ No newline at end of file From 5a85f53fde8bc8207a8d25a88fab5e23e495f25f Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Thu, 2 Oct 2025 20:22:32 +0300 Subject: [PATCH 04/17] SerializedData separate class --- pp/series_data/chunk/serialized_chunk.h | 60 ---- pp/series_data/serialization/deserializer.h | 14 - .../serialization/serialized_data.h | 209 ++++++++++++++ pp/series_data/serialization/serializer.h | 199 ------------- .../serializer_deserializer_new_tests.cpp | 263 +++++++++--------- .../serializer_deserializer_tests.cpp | 2 +- 6 files changed, 341 insertions(+), 406 deletions(-) create mode 100644 pp/series_data/serialization/serialized_data.h diff --git a/pp/series_data/chunk/serialized_chunk.h b/pp/series_data/chunk/serialized_chunk.h index 489a478f82..bfa07be766 100644 --- a/pp/series_data/chunk/serialized_chunk.h +++ b/pp/series_data/chunk/serialized_chunk.h @@ -81,64 +81,4 @@ class SerializedChunkIterator { } }; -struct SerializedData { - BareBones::Vector chunks; - BareBones::Memory bytes_buffer_; -}; - -class SerializedChunkIteratorNew { - public: - class Data { - public: - Data(const SerializedData& serialized_data, SerializedChunkSpan chunks) - : serialized_data_(serialized_data), chunk_iterator_(chunks.begin()), chunk_end_iterator_(chunks.end()) {} - - [[nodiscard]] PROMPP_ALWAYS_INLINE const SerializedChunk& chunk() const noexcept { return *chunk_iterator_; } - [[nodiscard]] PROMPP_ALWAYS_INLINE const SerializedData& data() const noexcept { return serialized_data_; } - [[nodiscard]] PROMPP_ALWAYS_INLINE PromPP::Primitives::LabelSetID series_id() const noexcept { return chunk_iterator_->label_set_id; } - - private: - friend class SerializedChunkIteratorNew; - - const SerializedData& serialized_data_; - SerializedChunkSpan::iterator chunk_iterator_; - SerializedChunkSpan::iterator chunk_end_iterator_; - - PROMPP_ALWAYS_INLINE void next_value() noexcept { ++chunk_iterator_; } - - [[nodiscard]] PROMPP_ALWAYS_INLINE bool has_value() const noexcept { return chunk_iterator_ != chunk_end_iterator_; } - }; - - using iterator_category = std::forward_iterator_tag; - using value_type = Data; - using difference_type = ptrdiff_t; - using pointer = value_type*; - using reference = value_type&; - - explicit SerializedChunkIteratorNew(const SerializedData& serialized_data) : data_(serialized_data, get_chunks(serialized_data)) {} - - [[nodiscard]] PROMPP_ALWAYS_INLINE const Data& operator*() const noexcept { return data_; } - [[nodiscard]] PROMPP_ALWAYS_INLINE const Data* operator->() const noexcept { return &data_; } - - PROMPP_ALWAYS_INLINE SerializedChunkIteratorNew& operator++() noexcept { - data_.next_value(); - return *this; - } - - PROMPP_ALWAYS_INLINE SerializedChunkIteratorNew operator++(int) noexcept { - const auto it = *this; - ++*this; - return it; - } - - PROMPP_ALWAYS_INLINE bool operator==(const IteratorSentinel&) const noexcept { return !data_.has_value(); } - - private: - Data data_; - - [[nodiscard]] PROMPP_ALWAYS_INLINE static SerializedChunkSpan get_chunks(const SerializedData& serialized_data) noexcept { - return {serialized_data.chunks.data(), serialized_data.chunks.size()}; - } -}; - } // namespace series_data::chunk \ No newline at end of file diff --git a/pp/series_data/serialization/deserializer.h b/pp/series_data/serialization/deserializer.h index 3520308323..06725e7988 100644 --- a/pp/series_data/serialization/deserializer.h +++ b/pp/series_data/serialization/deserializer.h @@ -24,10 +24,6 @@ class Deserializer { return {reinterpret_cast(buffer.data() + sizeof(uint32_t)), chunks_count}; } - [[nodiscard]] PROMPP_ALWAYS_INLINE static chunk::SerializedChunkSpan get_chunks(const chunk::SerializedData& serialized_data) noexcept { - return {serialized_data.chunks.data(), serialized_data.chunks.size()}; - } - [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(std::span buffer, const chunk::SerializedChunk& chunk) { decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { @@ -36,16 +32,6 @@ class Deserializer { return iterator; } - [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedData& serialized_data, - const chunk::SerializedChunk& chunk) { - decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); - std::span buffer{serialized_data.bytes_buffer_.control_block().data, serialized_data.bytes_buffer_.size()}; - Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { - iterator = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; - }); - return iterator; - } - [[nodiscard]] static decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedChunkIterator::Data& chunk) { return create_decode_iterator(chunk.buffer(), chunk.chunk()); } diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h new file mode 100644 index 0000000000..333df6ca0c --- /dev/null +++ b/pp/series_data/serialization/serialized_data.h @@ -0,0 +1,209 @@ +#pragma once +#include "bare_bones/memory.h" +#include "series_data/chunk/serialized_chunk.h" +#include "series_data/data_storage.h" +#include "series_data/querier/query.h" + +namespace series_data::serialization { +class SerializedData { + public: + explicit SerializedData(const DataStorage& storage, const querier::QueriedChunkList& queried_chunks) noexcept { serialize_internal(storage, queried_chunks); } + explicit SerializedData(const DataStorage& storage) noexcept { serialize_internal(storage, storage.chunks()); } + + [[nodiscard]] PROMPP_ALWAYS_INLINE chunk::SerializedChunkSpan get_chunks() const noexcept { return {chunks_.data(), chunks_.size()}; } + [[nodiscard]] PROMPP_ALWAYS_INLINE std::span get_buffer() const noexcept { + return {bytes_buffer_.control_block().data, bytes_buffer_.size()}; + } + + [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t allocated_memory() const noexcept { return chunks_.allocated_memory() + bytes_buffer_.allocated_memory(); } + + [[nodiscard]] decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedChunk& chunk) const noexcept { + decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); + std::span buffer{bytes_buffer_.control_block().data, bytes_buffer_.size()}; + Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { + iterator = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + return iterator; + } + + private: + struct TimestampStreamsData { + using TimestampId = uint32_t; + using Offset = uint32_t; + + static constexpr Offset kInvalidOffset = std::numeric_limits::max(); + + phmap::flat_hash_map stream_offsets; + phmap::flat_hash_map finalized_stream_offsets; + }; + + template + void serialize_internal(const DataStorage& storage, const ChunkList& chunks) noexcept { + const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); + + const uint32_t chunk_count = get_chunk_count(chunks); + + chunks_.reserve(chunk_count); + + uint32_t data_size = 0; + + TimestampStreamsData timestamp_streams_data; + for (auto& chunk_data : chunks) { + using enum chunk::DataChunk::Type; + + if (chunk_data.is_open()) [[likely]] { + if (const auto& chunk = get_chunk(storage, chunk_data); !chunk.is_empty()) [[likely]] { + fill_serialized_chunk(storage, chunk, chunks_.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, bytes_buffer_); + } + } else { + fill_serialized_chunk(storage, get_chunk(storage, chunk_data), chunks_.emplace_back(chunk_data.series_id()), + timestamp_streams_data, data_size, bytes_buffer_); + } + } + + bytes_buffer_.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); + std::memcpy(bytes_buffer_.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); + } + + template + PROMPP_ALWAYS_INLINE static uint32_t get_chunk_count(const ChunkList& chunks) noexcept { + if constexpr (std::is_same_v) { + return chunks.non_empty_chunk_count(); + } else { + return chunks.size(); + } + } + + template + void fill_serialized_chunk(const DataStorage& storage, + const chunk::DataChunk& chunk, + chunk::SerializedChunk& serialized_chunk, + TimestampStreamsData& timestamp_streams_data, + uint32_t& data_size, + BareBones::Memory& buffer) const noexcept { + using enum EncodingType; + + serialized_chunk.encoding_state = chunk.encoding_state; + + if (chunk.encoding_state.encoding_type != kGorilla) [[likely]] { + fill_timestamp_stream_offset(storage, timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, data_size, buffer); + } + + switch (chunk.encoding_state.encoding_type) { + case kUint32Constant: { + serialized_chunk.store_value_in_offset(chunk.encoder.uint32_constant); + break; + } + + case kFloat32Constant: { + serialized_chunk.store_value_in_offset(chunk.encoder.float32_constant); + break; + } + + case kDoubleConstant: { + serialized_chunk.set_offset(data_size); + buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::DoubleConstantEncoder)); + std::memcpy(buffer.control_block().data + data_size, &storage.variant_encoders[chunk.encoder.external_index].double_constant, + sizeof(encoder::value::DoubleConstantEncoder)); + data_size += sizeof(encoder::value::DoubleConstantEncoder); + break; + } + + case kTwoDoubleConstant: { + serialized_chunk.set_offset(data_size); + buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::TwoDoubleConstantEncoder)); + std::memcpy(buffer.control_block().data + data_size, &storage.variant_encoders[chunk.encoder.external_index].two_double_constant, + sizeof(encoder::value::TwoDoubleConstantEncoder)); + data_size += sizeof(encoder::value::TwoDoubleConstantEncoder); + break; + } + + case kAscInteger: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage.get_asc_integer_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kAscIntegerThenValuesGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage.get_asc_integer_then_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kValuesGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage.get_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + case kGorilla: { + serialized_chunk.set_offset(data_size); + write_compact_bit_sequence(storage.get_gorilla_encoder_stream(chunk.encoder.external_index), data_size, buffer); + break; + } + + default: { + assert(chunk.encoding_state.encoding_type != kUnknown); + } + } + } + + template + [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage& storage, const querier::QueriedChunk& queried_chunk) noexcept { + if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { + return storage.open_chunks[queried_chunk.series_id()]; + } else { + auto finalized_chunk_it = storage.finalized_chunks.find(queried_chunk.series_id())->second.begin(); + std::advance(finalized_chunk_it, queried_chunk.finalized_chunk_id); + return *finalized_chunk_it; + } + } + + template + [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage&, const DataStorage::SeriesChunkIterator::Data& chunk) noexcept { + return chunk.chunk(); + } + + template + void fill_timestamp_stream_offset(const DataStorage& storage, + TimestampStreamsData& timestamp_streams_data, + encoder::timestamp::State::Id timestamp_stream_id, + chunk::SerializedChunk& serialized_chunk, + uint32_t& data_size, + BareBones::Memory& buffer) const noexcept { + if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { + if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) [[unlikely]] { + timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); + serialized_chunk.timestamps_offset = data_size; + write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + } else { + serialized_chunk.timestamps_offset = it->second; + } + + serialized_chunk.timestamps_offset = timestamp_streams_data.stream_offsets[timestamp_stream_id]; + } else { + if (const auto it = timestamp_streams_data.finalized_stream_offsets.find(timestamp_stream_id); + it == timestamp_streams_data.finalized_stream_offsets.end()) [[unlikely]] { + timestamp_streams_data.finalized_stream_offsets.emplace(timestamp_stream_id, data_size); + serialized_chunk.timestamps_offset = data_size; + write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + } else { + serialized_chunk.timestamps_offset = it->second; + } + } + } + + template + static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, + uint32_t& data_size, + BareBones::Memory& buffer) { + const auto bytes_count = bit_sequence.size_in_bytes(); + buffer.grow_to_fit_at_least(data_size + bytes_count); + std::memcpy(buffer.control_block().data + data_size, bit_sequence.raw_bytes(), bytes_count); + data_size += bytes_count; + } + + BareBones::Vector chunks_; + BareBones::Memory bytes_buffer_; +}; +} // namespace series_data::serialization \ No newline at end of file diff --git a/pp/series_data/serialization/serializer.h b/pp/series_data/serialization/serializer.h index 23b481ddde..c0a357bf5f 100644 --- a/pp/series_data/serialization/serializer.h +++ b/pp/series_data/serialization/serializer.h @@ -7,7 +7,6 @@ #include "series_data/querier/query.h" namespace series_data::serialization { -inline namespace old_ { class Serializer { public: explicit Serializer(const DataStorage& storage) : storage_(storage) {} @@ -308,203 +307,5 @@ class Serializer { } } }; -} // namespace old_ - -inline namespace new_ { - -class Serializer { - public: - explicit Serializer(const DataStorage& storage) : storage_(storage) {} - - chunk::SerializedData serialize(const querier::QueriedChunkList& queried_chunks) { return serialize_impl(queried_chunks); } - - chunk::SerializedData serialize() { return serialize_impl(storage_.chunks()); } - - private: - struct TimestampStreamsData { - using TimestampId = uint32_t; - using Offset = uint32_t; - - static constexpr Offset kInvalidOffset = std::numeric_limits::max(); - - phmap::flat_hash_map stream_offsets; - phmap::flat_hash_map finalized_stream_offsets; - }; - - using QueriedChunk = querier::QueriedChunk; - using QueriedChunkList = querier::QueriedChunkList; - using SerializedChunk = chunk::SerializedChunk; - using SerializedChunkList = chunk::SerializedChunkList; - - const DataStorage& storage_; - - template - chunk::SerializedData serialize_impl(const ChunkList& chunks) { - const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); - - const uint32_t chunk_count = get_chunk_count(chunks); - - chunk::SerializedData result; - result.chunks.reserve(chunk_count); - - uint32_t data_size = 0; - - TimestampStreamsData timestamp_streams_data; - for (auto& chunk_data : chunks) { - using enum chunk::DataChunk::Type; - - if (chunk_data.is_open()) [[likely]] { - if (const auto& chunk = get_chunk(chunk_data); !chunk.is_empty()) [[likely]] { - fill_serialized_chunk(chunk, result.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, result.bytes_buffer_); - } - } else { - fill_serialized_chunk(get_chunk(chunk_data), result.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, - data_size, result.bytes_buffer_); - } - } - - result.bytes_buffer_.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); - std::memcpy(result.bytes_buffer_.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); - - return result; - } - - template - PROMPP_ALWAYS_INLINE static uint32_t get_chunk_count(const ChunkList& chunks) noexcept { - if constexpr (std::is_same_v) { - return chunks.non_empty_chunk_count(); - } else { - return chunks.size(); - } - } - - template - void fill_serialized_chunk(const chunk::DataChunk& chunk, - SerializedChunk& serialized_chunk, - TimestampStreamsData& timestamp_streams_data, - uint32_t& data_size, - BareBones::Memory& buffer) const noexcept { - using enum EncodingType; - - serialized_chunk.encoding_state = chunk.encoding_state; - - if (chunk.encoding_state.encoding_type != kGorilla) [[likely]] { - fill_timestamp_stream_offset(timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, data_size, buffer); - } - - switch (chunk.encoding_state.encoding_type) { - case kUint32Constant: { - serialized_chunk.store_value_in_offset(chunk.encoder.uint32_constant); - break; - } - - case kFloat32Constant: { - serialized_chunk.store_value_in_offset(chunk.encoder.float32_constant); - break; - } - - case kDoubleConstant: { - serialized_chunk.set_offset(data_size); - buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::DoubleConstantEncoder)); - std::memcpy(buffer.control_block().data + data_size, &storage_.variant_encoders[chunk.encoder.external_index].double_constant, - sizeof(encoder::value::DoubleConstantEncoder)); - data_size += sizeof(encoder::value::DoubleConstantEncoder); - break; - } - - case kTwoDoubleConstant: { - serialized_chunk.set_offset(data_size); - buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::TwoDoubleConstantEncoder)); - std::memcpy(buffer.control_block().data + data_size, &storage_.variant_encoders[chunk.encoder.external_index].two_double_constant, - sizeof(encoder::value::TwoDoubleConstantEncoder)); - data_size += sizeof(encoder::value::TwoDoubleConstantEncoder); - break; - } - - case kAscInteger: { - serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage_.get_asc_integer_stream(chunk.encoder.external_index), data_size, buffer); - break; - } - - case kAscIntegerThenValuesGorilla: { - serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage_.get_asc_integer_then_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); - break; - } - - case kValuesGorilla: { - serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage_.get_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); - break; - } - - case kGorilla: { - serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage_.get_gorilla_encoder_stream(chunk.encoder.external_index), data_size, buffer); - break; - } - - default: { - assert(chunk.encoding_state.encoding_type != kUnknown); - } - } - } - - template - [[nodiscard]] const chunk::DataChunk& get_chunk(const QueriedChunk& queried_chunk) const noexcept { - if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { - return storage_.open_chunks[queried_chunk.series_id()]; - } else { - auto finalized_chunk_it = storage_.finalized_chunks.find(queried_chunk.series_id())->second.begin(); - std::advance(finalized_chunk_it, queried_chunk.finalized_chunk_id); - return *finalized_chunk_it; - } - } - - template - [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage::SeriesChunkIterator::Data& chunk) noexcept { - return chunk.chunk(); - } - - template - void fill_timestamp_stream_offset(TimestampStreamsData& timestamp_streams_data, - encoder::timestamp::State::Id timestamp_stream_id, - SerializedChunk& serialized_chunk, - uint32_t& data_size, - BareBones::Memory& buffer) const noexcept { - if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { - if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) [[unlikely]] { - timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); - serialized_chunk.timestamps_offset = data_size; - write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); - } else { - serialized_chunk.timestamps_offset = it->second; - } - - serialized_chunk.timestamps_offset = timestamp_streams_data.stream_offsets[timestamp_stream_id]; - } else { - if (const auto it = timestamp_streams_data.finalized_stream_offsets.find(timestamp_stream_id); - it == timestamp_streams_data.finalized_stream_offsets.end()) [[unlikely]] { - timestamp_streams_data.finalized_stream_offsets.emplace(timestamp_stream_id, data_size); - serialized_chunk.timestamps_offset = data_size; - write_compact_bit_sequence(storage_.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); - } else { - serialized_chunk.timestamps_offset = it->second; - } - } - } - - template - static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, - uint32_t& data_size, - BareBones::Memory& buffer) { - const auto bytes_count = bit_sequence.size_in_bytes(); - buffer.grow_to_fit_at_least(data_size + bytes_count); - std::memcpy(buffer.control_block().data + data_size, bit_sequence.raw_bytes(), bytes_count); - data_size += bytes_count; - } -}; -} // namespace new_ } // namespace series_data::serialization diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index de15914c46..3e6cb9717c 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -5,6 +5,7 @@ #include "series_data/encoder.h" #include "series_data/encoder/bit_sequence.h" #include "series_data/serialization/deserializer.h" +#include "series_data/serialization/serialized_data.h" #include "series_data/serialization/serializer.h" namespace { @@ -21,12 +22,11 @@ using series_data::encoder::SampleList; using series_data::querier::QueriedChunk; using series_data::querier::QueriedChunkList; using series_data::serialization::Deserializer; -using series_data::serialization::new_::Serializer; +using series_data::serialization::SerializedData; class SerializerDeserializerTrait { protected: DataStorage storage_; - Serializer serializer_{storage_}; Encoder<> encoder_{storage_}; template @@ -43,13 +43,13 @@ TEST_F(SerializerDeserializerFixtureNew, EmptyChunksList) { // Arrange // Act - const auto serialized = serializer_.serialize({}); + const SerializedData serialized(storage_, {}); // const Deserializer deserializer(serialized); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(0U, serialized.chunks.size()); - ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized.bytes_buffer_.size()); + ASSERT_EQ(0U, serialized.get_chunks().size()); + ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized.get_buffer().size()); } TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimestampStream) { @@ -64,22 +64,22 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta encoder_.encode(1, 3, 1.0); // Act - const auto serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}}); + const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); // const Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(2U, Deserializer::get_chunks(serialized).size()); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - EXPECT_EQ(Deserializer::get_chunks(serialized)[0].timestamps_offset, Deserializer::get_chunks(serialized)[1].timestamps_offset); + ASSERT_EQ(2U, serialized.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); + EXPECT_EQ(serialized.get_chunks()[0].timestamps_offset, serialized.get_chunks()[1].timestamps_offset); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ @@ -87,7 +87,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); } TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUniqueTimestampStream) { @@ -106,37 +106,37 @@ TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUn encoder_.encode(2, 3, 2.0); // Act - const auto serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); + const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); // const Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(3U, Deserializer::get_chunks(serialized).size()); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); - EXPECT_EQ(Deserializer::get_chunks(serialized)[0].timestamps_offset, Deserializer::get_chunks(serialized)[1].timestamps_offset); + ASSERT_EQ(3U, serialized.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[2].encoding_state.encoding_type); + EXPECT_EQ(serialized.get_chunks()[0].timestamps_offset, serialized.get_chunks()[1].timestamps_offset); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 2.0}, {.timestamp = 2, .value = 2.0}, {.timestamp = 3, .value = 2.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { @@ -173,72 +173,72 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { encoder_.encode(8, 123, 4.1); // Act - const auto serialized = serializer_.serialize(); + const SerializedData serialized(storage_); // Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); - ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + ASSERT_EQ(10U, serialized.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -246,13 +246,13 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { @@ -299,72 +299,72 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - const auto serialized = serializer_.serialize(); + const SerializedData serialized(storage_); // Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); - ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + ASSERT_EQ(10U, serialized.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -372,13 +372,13 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); } TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { @@ -388,7 +388,7 @@ TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); // Act - const auto serialized = serializer_.serialize({QueriedChunk{1}}); + const SerializedData serialized(storage_, {QueriedChunk{1}}); // const Deserializer deserializer(get_buffer()); // Assert @@ -396,7 +396,7 @@ TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); } TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { @@ -408,7 +408,7 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { encoder_.encode(0, 103, 1.0); // Act - const auto serialized = serializer_.serialize(); + const SerializedData serialized(storage_); // const Deserializer deserializer(get_buffer()); // Assert @@ -418,12 +418,12 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { {.timestamp = 101, .value = 1.0}, {.timestamp = 102, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 103, .value = 1.0}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { @@ -470,44 +470,44 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { encoder_.encode(8, 134, STALE_NAN); // Act - const auto serialized = serializer_.serialize(); + const SerializedData serialized(storage_); // Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); - EXPECT_TRUE(std::ranges::all_of(Deserializer::get_chunks(serialized), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); - ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + ASSERT_EQ(10U, serialized.get_chunks().size()); + EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -515,7 +515,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -523,7 +523,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -531,20 +531,20 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -553,14 +553,14 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { @@ -617,44 +617,44 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - const auto serialized = serializer_.serialize(); + const SerializedData serialized(storage_); // Deserializer deserializer(get_buffer()); // Assert // ASSERT_TRUE(deserializer.is_valid()); - ASSERT_EQ(10U, Deserializer::get_chunks(serialized).size()); - EXPECT_TRUE(std::ranges::all_of(Deserializer::get_chunks(serialized), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, Deserializer::get_chunks(serialized)[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, Deserializer::get_chunks(serialized)[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, Deserializer::get_chunks(serialized)[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, Deserializer::get_chunks(serialized)[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, Deserializer::get_chunks(serialized)[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, Deserializer::get_chunks(serialized)[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, Deserializer::get_chunks(serialized)[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, Deserializer::get_chunks(serialized)[9].encoding_state.encoding_type); - ASSERT_EQ(20U, Deserializer::get_chunks(serialized)[9].label_set_id); + ASSERT_EQ(10U, serialized.get_chunks().size()); + EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[0])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[1])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[2])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -662,7 +662,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[3])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -670,7 +670,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[4])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -678,20 +678,20 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[5])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[6])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[7])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -700,24 +700,24 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[8])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_chunk(Deserializer::create_decode_iterator(serialized, Deserializer::get_chunks(serialized)[9])))); + decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); } -/*class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, public testing::Test { +class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, public testing::Test { protected: using DecodedChunks = std::vector; - DecodedChunks decode_chunks() const { + DecodedChunks decode_chunks(const SerializedData& serialized_data) const { DecodedChunks result; - for (auto& chunk : Deserializer{get_buffer()}) { - result.emplace_back(decode_chunk(Deserializer::create_decode_iterator(chunk))); + for (auto& chunk : serialized_data.get_chunks()) { + result.emplace_back(decode_chunk(serialized_data.create_decode_iterator(chunk))); } return result; } @@ -727,8 +727,8 @@ TEST_F(DeserializerIteratorFixtureNew, EmptyChunksList) { // Arrange // Act - serializer_.serialize({}, stream_); - auto decoded_chunks = decode_chunks(); + const SerializedData serialized({}); + auto decoded_chunks = decode_chunks(serialized); // Assert EXPECT_TRUE(std::ranges::equal(DecodedChunks{}, decoded_chunks)); @@ -740,8 +740,8 @@ TEST_F(DeserializerIteratorFixtureNew, OneChunk) { encoder_.encode(0, 2, 1.0); // Act - serializer_.serialize({QueriedChunk{0}}, stream_); - auto decoded_chunks = decode_chunks(); + const SerializedData serialized(storage_, {QueriedChunk{0}}); + auto decoded_chunks = decode_chunks(serialized); // Assert EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}}}, decoded_chunks)); @@ -753,11 +753,10 @@ TEST_F(DeserializerIteratorFixtureNew, TwoChunks) { encoder_.encode(1, 2, 1.0); // Act - serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}}, stream_); - auto decoded_chunks = decode_chunks(); + const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); + auto decoded_chunks = decode_chunks(serialized); // Assert EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}}, SampleList{{.timestamp = 2, .value = 1.0}}}, decoded_chunks)); -}*/ - +} } // namespace \ No newline at end of file diff --git a/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp index b843655ac7..15b5d52ebd 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_tests.cpp @@ -20,7 +20,7 @@ using series_data::encoder::SampleList; using series_data::querier::QueriedChunk; using series_data::querier::QueriedChunkList; using series_data::serialization::Deserializer; -using series_data::serialization::old_::Serializer; +using series_data::serialization::Serializer; class SerializerDeserializerTrait { protected: From 02619ba82c21c1f8981e6c2e1449cb5bfe7e239d Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Thu, 2 Oct 2025 21:26:37 +0300 Subject: [PATCH 05/17] update benchmark --- .../benchmarks/serializer_benchmark.cpp | 120 +++++++++++++++--- .../serialization/serialized_data.h | 3 +- 2 files changed, 104 insertions(+), 19 deletions(-) diff --git a/pp/series_data/benchmarks/serializer_benchmark.cpp b/pp/series_data/benchmarks/serializer_benchmark.cpp index 08fdf7c3bb..62cab803a8 100644 --- a/pp/series_data/benchmarks/serializer_benchmark.cpp +++ b/pp/series_data/benchmarks/serializer_benchmark.cpp @@ -9,6 +9,7 @@ #include "bare_bones/preprocess.h" #include "series_data/encoder.h" #include "series_data/querier/query.h" +#include "series_data/serialization/serialized_data.h" #include "series_data/serialization/serializer.h" namespace { @@ -73,19 +74,18 @@ void BenchmarkWalSerializer(benchmark::State& state) { } for ([[maybe_unused]] auto _ : state) { - series_data::serialization::new_::Serializer serializer_{storage}; + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; - const auto x = serializer_.serialize(chunk_list); + serializer_.serialize(chunk_list, stream); } { - series_data::serialization::new_::Serializer serializer_{storage}; + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; - const auto x = serializer_.serialize(chunk_list); - state.counters["total mem"] = - benchmark::Counter(x.chunks.allocated_memory() + x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); - state.counters["chunk mem"] = benchmark::Counter(x.chunks.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); - state.counters["stream mem"] = benchmark::Counter(x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + serializer_.serialize(chunk_list, stream); + state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } @@ -122,24 +122,110 @@ void BenchmarkWalConstantSerializer(benchmark::State& state) { } for ([[maybe_unused]] auto _ : state) { - series_data::serialization::new_::Serializer serializer_{storage}; - // BareBones::ShrinkedToFitOStringStream stream; + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; - const auto x = serializer_.serialize(chunk_list); + serializer_.serialize(chunk_list, stream); } { - series_data::serialization::new_::Serializer serializer_{storage}; + series_data::serialization::Serializer serializer_{storage}; + BareBones::ShrinkedToFitOStringStream stream; - const auto x = serializer_.serialize(chunk_list); - state.counters["total mem"] = - benchmark::Counter(x.chunks.allocated_memory() + x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); - state.counters["chunk mem"] = benchmark::Counter(x.chunks.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); - state.counters["stream mem"] = benchmark::Counter(x.bytes_buffer_.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + serializer_.serialize(chunk_list, stream); + state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + } +} + +void BenchmarkWalSerializedData(benchmark::State& state) { + const auto& samples = get_samples_for_benchmark(); + const double percent = state.range(0) / 100.0; + const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); + const auto min_ts = min->timestamp; + const auto max_ts = max->timestamp; + const auto delta_ts = max_ts - min_ts; + + series_data::DataStorage storage; + series_data::Encoder encoder{storage}; + + for (const auto& sample : samples) { + if (sample.timestamp < min_ts + delta_ts * percent) { + encoder.encode(sample.series_id, sample.timestamp, sample.value); + } + } + + series_data::querier::QueriedChunkList chunk_list; + { + std::vector v(storage.open_chunks.size()); + std::iota(v.begin(), v.end(), 0); + + std::mt19937 g(42); + std::ranges::shuffle(v, g); + v.resize(v.size() / 10); + + chunk_list.reserve(v.size()); + for (uint32_t ls_id : v) { + chunk_list.emplace_back(ls_id); + } + } + + for ([[maybe_unused]] auto _ : state) { + series_data::serialization::SerializedData serialized(storage, chunk_list); + benchmark::DoNotOptimize(serialized); + } + + { + series_data::serialization::SerializedData serialized(storage, chunk_list); + state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + } +} + +void BenchmarkWalConstantSerializedData(benchmark::State& state) { + const auto& samples = get_samples_for_benchmark(); + const double percent = state.range(0) / 100.0; + const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); + const auto min_ts = min->timestamp; + const auto max_ts = max->timestamp; + const auto delta_ts = max_ts - min_ts; + + series_data::DataStorage storage; + series_data::Encoder encoder{storage}; + + for (const auto& sample : samples) { + if (sample.timestamp <= min_ts + delta_ts * percent) { + encoder.encode(sample.series_id, sample.timestamp, sample.series_id); + } + } + + series_data::querier::QueriedChunkList chunk_list; + { + std::vector v(storage.open_chunks.size()); + std::iota(v.begin(), v.end(), 0); + + std::mt19937 g(42); + std::ranges::shuffle(v, g); + v.resize(v.size() / 10); + + chunk_list.reserve(v.size()); + for (uint32_t ls_id : v) { + chunk_list.emplace_back(ls_id); + } + } + + for ([[maybe_unused]] auto _ : state) { + series_data::serialization::SerializedData serialized(storage, chunk_list); + benchmark::DoNotOptimize(serialized); + } + + { + series_data::serialization::SerializedData serialized(storage, chunk_list); + state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } BENCHMARK(BenchmarkWalSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100); +BENCHMARK(BenchmarkWalSerializedData)->Arg(25)->Arg(50)->Arg(75)->Arg(100); BENCHMARK(BenchmarkWalConstantSerializer)->Arg(25)->Arg(50)->Arg(75)->Arg(100); +BENCHMARK(BenchmarkWalConstantSerializedData)->Arg(25)->Arg(50)->Arg(75)->Arg(100); } // namespace diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 333df6ca0c..431caa28cd 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -2,6 +2,7 @@ #include "bare_bones/memory.h" #include "series_data/chunk/serialized_chunk.h" #include "series_data/data_storage.h" +#include "series_data/decoder/universal_decode_iterator.h" #include "series_data/querier/query.h" namespace series_data::serialization { @@ -179,8 +180,6 @@ class SerializedData { } else { serialized_chunk.timestamps_offset = it->second; } - - serialized_chunk.timestamps_offset = timestamp_streams_data.stream_offsets[timestamp_stream_id]; } else { if (const auto it = timestamp_streams_data.finalized_stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.finalized_stream_offsets.end()) [[unlikely]] { From c6f562d98dee67d415ef58fe0cd20fcf8d3818ff Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 3 Oct 2025 15:42:27 +0300 Subject: [PATCH 06/17] interface --- .../serialization/serialized_data.h | 90 +++++++++++++++++++ .../serializer_deserializer_new_tests.cpp | 32 +++++-- 2 files changed, 116 insertions(+), 6 deletions(-) diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 431caa28cd..4ec65095aa 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -8,6 +8,65 @@ namespace series_data::serialization { class SerializedData { public: + static constexpr uint32_t kNoMoreSeries = std::numeric_limits::max(); + + class SerializedSeriesIterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = encoder::Sample; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + SerializedSeriesIterator(const BareBones::Memory& buffer, + chunk::SerializedChunkSpan chunks, + uint32_t chunk_id) + : decode_iter_(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false), + chunk_iter_(chunks.begin() + chunk_id), + series_id_(chunk_iter_->label_set_id), + buffer_(buffer.control_block().data, buffer.size()), + chunks_(chunks) { + Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { + decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + } + + [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample& operator*() const noexcept { return *decode_iter_; } + [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample* operator->() const noexcept { return decode_iter_.operator->(); } + + PROMPP_ALWAYS_INLINE SerializedSeriesIterator& operator++() noexcept { + ++decode_iter_; + if (decode_iter_ == decoder::DecodeIteratorSentinel{}) [[unlikely]] { + if (std::next(chunk_iter_) != chunks_.end() && series_id_ == std::next(chunk_iter_)->label_set_id) { + ++chunk_iter_; + Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { + decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + } + } + return *this; + } + + PROMPP_ALWAYS_INLINE SerializedSeriesIterator operator++(int) noexcept { + const auto it = *this; + ++*this; + return it; + } + + PROMPP_ALWAYS_INLINE bool operator==(const decoder::DecodeIteratorSentinel&) const noexcept { + return (decode_iter_ == decoder::DecodeIteratorSentinel{}) && + (std::next(chunk_iter_) == chunks_.end() || series_id_ != std::next(chunk_iter_)->label_set_id); + } + + private: + decoder::UniversalDecodeIterator decode_iter_; + chunk::SerializedChunkSpan::const_iterator chunk_iter_; + uint32_t series_id_; + + std::span buffer_; + chunk::SerializedChunkSpan chunks_; + }; + explicit SerializedData(const DataStorage& storage, const querier::QueriedChunkList& queried_chunks) noexcept { serialize_internal(storage, queried_chunks); } explicit SerializedData(const DataStorage& storage) noexcept { serialize_internal(storage, storage.chunks()); } @@ -27,6 +86,36 @@ class SerializedData { return iterator; } + [[nodiscard]] uint32_t next_series() noexcept { + if (internal_index_ == kNoMoreSeries) [[unlikely]] { + if (chunks_.empty()) [[unlikely]] { + return kNoMoreSeries; + } + internal_index_ = 0; + return chunks_[0].label_set_id; + } + + if (internal_index_ == chunks_.size()) [[unlikely]] { + return kNoMoreSeries; + } + + const uint32_t current_series_id = chunks_[internal_index_].label_set_id; + while (internal_index_ < chunks_.size() && current_series_id == chunks_[internal_index_].label_set_id) { + ++internal_index_; + } + + if (internal_index_ == chunks_.size()) [[unlikely]] { + return kNoMoreSeries; + } + + return chunks_[internal_index_].label_set_id; + } + + [[nodiscard]] SerializedSeriesIterator create_current_series_iterator() const noexcept { return {bytes_buffer_, get_chunks(), internal_index_}; } + [[nodiscard]] auto create_current_series_range() const noexcept { + return std::ranges::subrange(SerializedSeriesIterator{bytes_buffer_, get_chunks(), internal_index_}, decoder::DecodeIteratorSentinel{}); + } + private: struct TimestampStreamsData { using TimestampId = uint32_t; @@ -204,5 +293,6 @@ class SerializedData { BareBones::Vector chunks_; BareBones::Memory bytes_buffer_; + uint32_t internal_index_ = std::numeric_limits::max(); }; } // namespace series_data::serialization \ No newline at end of file diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index 3e6cb9717c..33c64be062 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -714,10 +714,12 @@ class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, publi protected: using DecodedChunks = std::vector; - DecodedChunks decode_chunks(const SerializedData& serialized_data) const { + DecodedChunks decode_chunks(SerializedData& serialized_data) const { DecodedChunks result; - for (auto& chunk : serialized_data.get_chunks()) { - result.emplace_back(decode_chunk(serialized_data.create_decode_iterator(chunk))); + while (serialized_data.next_series() != SerializedData::kNoMoreSeries) { + SampleList samples; + std::ranges::copy(serialized_data.create_current_series_range(), std::back_insert_iterator(samples)); + result.emplace_back(samples); } return result; } @@ -727,7 +729,7 @@ TEST_F(DeserializerIteratorFixtureNew, EmptyChunksList) { // Arrange // Act - const SerializedData serialized({}); + SerializedData serialized({}); auto decoded_chunks = decode_chunks(serialized); // Assert @@ -740,20 +742,38 @@ TEST_F(DeserializerIteratorFixtureNew, OneChunk) { encoder_.encode(0, 2, 1.0); // Act - const SerializedData serialized(storage_, {QueriedChunk{0}}); + SerializedData serialized(storage_, {QueriedChunk{0}}); auto decoded_chunks = decode_chunks(serialized); // Assert EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}}}, decoded_chunks)); } +TEST_F(DeserializerIteratorFixtureNew, OneChunkFinalized) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(0, 2, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 3, 1.0); + encoder_.encode(0, 4, 1.0); + + // Act + SerializedData serialized(storage_); + auto decoded_chunks = decode_chunks(serialized); + + // Assert + EXPECT_TRUE(std::ranges::equal( + DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, {.timestamp = 4, .value = 1.0}}}, + decoded_chunks)); +} + TEST_F(DeserializerIteratorFixtureNew, TwoChunks) { // Arrange encoder_.encode(0, 1, 1.0); encoder_.encode(1, 2, 1.0); // Act - const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); + SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); auto decoded_chunks = decode_chunks(serialized); // Assert From 74c7eacb7e20271b86191a939faf06dad8cd08da Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 3 Oct 2025 17:07:15 +0300 Subject: [PATCH 07/17] entrypoint --- pp/bare_bones/memory.h | 1 + pp/entrypoint/head/serialization.h | 13 +++ pp/entrypoint/series_data/querier.h | 41 +++++++++- pp/entrypoint/series_data_data_storage.cpp | 32 ++++++++ pp/entrypoint/series_data_data_storage.h | 16 ++++ ...ies_data_serialization_serializes_data.cpp | 79 ++++++++++++++++++ ...eries_data_serialization_serializes_data.h | 80 +++++++++++++++++++ 7 files changed, 258 insertions(+), 4 deletions(-) create mode 100644 pp/entrypoint/head/serialization.h create mode 100644 pp/entrypoint/series_data_serialization_serializes_data.cpp create mode 100644 pp/entrypoint/series_data_serialization_serializes_data.h diff --git a/pp/bare_bones/memory.h b/pp/bare_bones/memory.h index 4c55789f89..367bbc7198 100644 --- a/pp/bare_bones/memory.h +++ b/pp/bare_bones/memory.h @@ -2,6 +2,7 @@ #include #include +#include #include "preprocess.h" #include "type_traits.h" diff --git a/pp/entrypoint/head/serialization.h b/pp/entrypoint/head/serialization.h new file mode 100644 index 0000000000..6ac36a2479 --- /dev/null +++ b/pp/entrypoint/head/serialization.h @@ -0,0 +1,13 @@ +#pragma once + +#include "series_data/serialization/serialized_data.h" + +namespace entrypoint::head { + +using SerializedDataPtr = std::unique_ptr; +using SerializedDataIteratorPtr = std::unique_ptr; + +static_assert(sizeof(SerializedDataPtr) == sizeof(void*)); +static_assert(sizeof(SerializedDataIteratorPtr) == sizeof(void*)); + +} // namespace entrypoint::head \ No newline at end of file diff --git a/pp/entrypoint/series_data/querier.h b/pp/entrypoint/series_data/querier.h index 60c03b0a32..08b0434b73 100644 --- a/pp/entrypoint/series_data/querier.h +++ b/pp/entrypoint/series_data/querier.h @@ -5,6 +5,7 @@ #include "primitives/primitives.h" #include "series_data/querier/instant_querier.h" #include "series_data/querier/querier.h" +#include "series_data/serialization/serialized_data.h" #include "series_data/serialization/serializer.h" namespace entrypoint::series_data { @@ -87,15 +88,47 @@ class RangeQuerierWithArgumentsWrapper { } }; -enum class QuerierType : uint8_t { - kInstantQuerier = 0, - kRangeQuerier, +class RangeQuerierWithArgumentsWrapperNew { + using DataStorage = ::series_data::DataStorage; + using LabelSetID = PromPP::Primitives::LabelSetID; + template + using Slice = PromPP::Primitives::Go::Slice; + using Query = ::series_data::querier::Query>; + using Serializer = ::series_data::serialization::Serializer; + using BytesStream = PromPP::Primitives::Go::BytesStream; + + public: + RangeQuerierWithArgumentsWrapperNew(DataStorage& storage, const Query& query, ::series_data::serialization::SerializedData* serialized_data) + : querier_(storage), query_(&query), serialized_data_(serialized_data) {} + + void query() noexcept { + querier_.query(*query_); + if (!querier_.need_loading()) { + serialize_chunks(); + } + } + + PROMPP_ALWAYS_INLINE void query_finalize() const noexcept { serialize_chunks(); } + + [[nodiscard]] const BareBones::Bitset& series_to_load() const noexcept { return querier_.get_series_to_load(); } + [[nodiscard]] bool need_loading() const noexcept { return querier_.need_loading(); } + [[nodiscard]] DataStorage& storage() noexcept { return querier_.get_storage(); } + + private: + ::series_data::querier::Querier querier_; + const Query* query_; + ::series_data::serialization::SerializedData* serialized_data_; + + PROMPP_ALWAYS_INLINE void serialize_chunks() const noexcept { std::construct_at(serialized_data_, querier_.get_storage(), querier_.chunks()); } }; -using QuerierVariant = std::variant; +enum class QuerierType : uint8_t { kInstantQuerier = 0, kRangeQuerier, kRangeQuerierNew }; + +using QuerierVariant = std::variant; using QuerierVariantPtr = std::unique_ptr; } // namespace entrypoint::series_data static_assert(entrypoint::series_data::QuerierInterface); static_assert(entrypoint::series_data::QuerierInterface); +static_assert(entrypoint::series_data::QuerierInterface); \ No newline at end of file diff --git a/pp/entrypoint/series_data_data_storage.cpp b/pp/entrypoint/series_data_data_storage.cpp index acaf85bbfc..c46baafd80 100644 --- a/pp/entrypoint/series_data_data_storage.cpp +++ b/pp/entrypoint/series_data_data_storage.cpp @@ -5,6 +5,7 @@ #include "head/chunk_recoder.h" #include "head/data_storage.h" #include "head/lss.h" +#include "head/serialization.h" #include "primitives/go_slice.h" #include "series_data/data_storage.h" #include "series_data/loader.h" @@ -142,6 +143,37 @@ extern "C" void prompp_series_data_data_storage_query(void* args, void* res) { } } +extern "C" void prompp_series_data_data_storage_query_new(void* args, void* res) { + using Query = series_data::querier::Query>; + using entrypoint::series_data::RangeQuerierWithArgumentsWrapperNew; + using series_data::querier::Querier; + + struct Arguments { + DataStoragePtr data_storage; + Query query; + entrypoint::head::SerializedDataPtr serialized_data; + }; + + struct Result { + QuerierVariantPtr querier{}; + QueryStatus status; + }; + + const auto in = static_cast(args); + + RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, in->serialized_data.get()); + querier.query(); + + if (querier.need_loading()) { + new (res) Result{ + .querier = std::make_unique(std::in_place_index<2>, std::move(querier)), + .status = QueryStatus::kNeedDataLoad, + }; + } else { + new (res) Result{.status = QueryStatus::kSuccess}; + } +} + extern "C" void prompp_series_data_data_storage_instant_query(void* args, void* res) { using entrypoint::series_data::InstantQuerierWithArgumentsWrapperEntrypoint; using PromPP::Primitives::Timestamp; diff --git a/pp/entrypoint/series_data_data_storage.h b/pp/entrypoint/series_data_data_storage.h index a6c47cd756..647a1e7046 100644 --- a/pp/entrypoint/series_data_data_storage.h +++ b/pp/entrypoint/series_data_data_storage.h @@ -108,6 +108,22 @@ void prompp_series_data_data_storage_allocated_memory(void* args, void* res); */ void prompp_series_data_data_storage_query(void* args, void* res); +/** + * @brief Queries data storage and serializes result (new serialization model). + * + * @param args { + * dataStorage uintptr // pointer to constructed data storage + * query DataStorageQuery // query + * serializedData uintptr // pointer to serialized data + * } + * + * @param res { + * Querier uintptr // pointer to constructed Querier if data loading is needed + * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * } + */ +void prompp_series_data_data_storage_query_new(void* args, void* res); + /** * @brief return samples at given timestamp for label sets. * diff --git a/pp/entrypoint/series_data_serialization_serializes_data.cpp b/pp/entrypoint/series_data_serialization_serializes_data.cpp new file mode 100644 index 0000000000..eaa053b707 --- /dev/null +++ b/pp/entrypoint/series_data_serialization_serializes_data.cpp @@ -0,0 +1,79 @@ +#include "series_data_serialization_serializes_data.h" + +#include "head/serialization.h" + +extern "C" void prompp_series_data_serialization_serialized_data_next(void* args, void* res) { + struct Arguments { + entrypoint::head::SerializedDataPtr serialized_data; + }; + + using Result = struct { + uint32_t series_id; + }; + + new (res) Result{.series_id = reinterpret_cast(args)->serialized_data->next_series()}; +} + +extern "C" void prompp_series_data_serialization_serialized_data_iterator(void* args, void* res) { + struct Arguments { + entrypoint::head::SerializedDataPtr serialized_data; + }; + + using Result = struct { + entrypoint::head::SerializedDataIteratorPtr iterator; + }; + + new (res) Result{.iterator = std::make_unique( + static_cast(args)->serialized_data->create_current_series_iterator())}; +} + +extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res) { + using series_data::decoder::DecodeIteratorSentinel; + + struct Arguments { + entrypoint::head::SerializedDataIteratorPtr iterator; + }; + + using Result = struct { + bool has_value; + }; + + Arguments* in = reinterpret_cast(args); + Result* out = new (res) Result(); + + ++(*in->iterator); + out->has_value = (*in->iterator) != DecodeIteratorSentinel{}; +} + +extern "C" void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res) { + struct Arguments { + entrypoint::head::SerializedDataIteratorPtr iterator; + }; + using Result = struct { + int64_t timestamp; + double value; + }; + + Arguments* in = reinterpret_cast(args); + Result* out = new (res) Result(); + + const auto sample = **(in->iterator); + out->timestamp = sample.timestamp; + out->value = sample.value; +} + +extern "C" void prompp_series_data_serialization_serialized_data_iterator_dtor(void* args) { + struct Arguments { + entrypoint::head::SerializedDataIteratorPtr iterator; + }; + + static_cast(args)->~Arguments(); +} + +extern "C" void prompp_series_data_serialization_serialized_data_dtor(void* args) { + struct Arguments { + entrypoint::head::SerializedDataPtr serialized_data; + }; + + static_cast(args)->~Arguments(); +} \ No newline at end of file diff --git a/pp/entrypoint/series_data_serialization_serializes_data.h b/pp/entrypoint/series_data_serialization_serializes_data.h new file mode 100644 index 0000000000..3849fde946 --- /dev/null +++ b/pp/entrypoint/series_data_serialization_serializes_data.h @@ -0,0 +1,80 @@ +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Get next series_id in serialized data. + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + * @param res { + * series_id uint32 // series id (UINT32_MAX if no more series). + * } + */ +void prompp_series_data_serialization_serialized_data_next(void* args, void* res); + +/** + * @brief Create a decode iterator for current series_id (returned by the last call of _next()) + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + * @param res { + * iterator uintptr // pointer to constructed decode iterator. + * } + */ +void prompp_series_data_serialization_serialized_data_iterator(void* args, void* res); + +/** + * @brief Advance decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + * @param res { + * has_data bool // is iterator has more data to decode. + * } + */ +void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); + +/** + * @brief Extract sample form decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + * @param res { + * timestamp int64 // sample timestamp + * value float64 // sample value + * } + */ +void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res); + +/** + * @brief Destroy decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + */ +void prompp_series_data_serialization_serialized_data_iterator_dtor(void* args); + +/** + * @brief Destroy serialized data object. + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + */ +void prompp_series_data_serialization_serialized_data_dtor(void* args); + +#ifdef __cplusplus +} // extern "C" +#endif From 68f7513dc80f37218b21ea8b281688c9701f5810 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 3 Oct 2025 17:12:52 +0300 Subject: [PATCH 08/17] includes fix --- pp/go/cppbridge/entrypoint.h | 96 +++++++++++++++++++ .../serialization/serialized_data.h | 1 + 2 files changed, 97 insertions(+) diff --git a/pp/go/cppbridge/entrypoint.h b/pp/go/cppbridge/entrypoint.h index a59048052c..d68740072e 100755 --- a/pp/go/cppbridge/entrypoint.h +++ b/pp/go/cppbridge/entrypoint.h @@ -1200,6 +1200,22 @@ void prompp_series_data_data_storage_allocated_memory(void* args, void* res); */ void prompp_series_data_data_storage_query(void* args, void* res); +/** + * @brief Queries data storage and serializes result (new serialization model). + * + * @param args { + * dataStorage uintptr // pointer to constructed data storage + * query DataStorageQuery // query + * serializedData uintptr // pointer to serialized data + * } + * + * @param res { + * Querier uintptr // pointer to constructed Querier if data loading is needed + * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * } + */ +void prompp_series_data_data_storage_query_new(void* args, void* res); + /** * @brief return samples at given timestamp for label sets. * @@ -1540,6 +1556,86 @@ void prompp_series_data_encoder_dtor(void* args); extern "C" { #endif +/** + * @brief Get next series_id in serialized data. + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + * @param res { + * series_id uint32 // series id (UINT32_MAX if no more series). + * } + */ +void prompp_series_data_serialization_serialized_data_next(void* args, void* res); + +/** + * @brief Create a decode iterator for current series_id (returned by the last call of _next()) + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + * @param res { + * iterator uintptr // pointer to constructed decode iterator. + * } + */ +void prompp_series_data_serialization_serialized_data_iterator(void* args, void* res); + +/** + * @brief Advance decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + * @param res { + * has_data bool // is iterator has more data to decode. + * } + */ +void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); + +/** + * @brief Extract sample form decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + * @param res { + * timestamp int64 // sample timestamp + * value float64 // sample value + * } + */ +void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res); + +/** + * @brief Destroy decode iterator. + * + * @param args { + * iterator uintptr // pointer to decode iterator + * } + * + */ +void prompp_series_data_serialization_serialized_data_iterator_dtor(void* args); + +/** + * @brief Destroy serialized data object. + * + * @param args { + * serializedData uintptr // pointer to serialized data. + * } + * + */ +void prompp_series_data_serialization_serialized_data_dtor(void* args); + +#ifdef __cplusplus +} // extern "C" +#endif +#ifdef __cplusplus +extern "C" { +#endif + /** * @brief Construct a new WAL Decoder * diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 4ec65095aa..a2cec1c4c2 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -2,6 +2,7 @@ #include "bare_bones/memory.h" #include "series_data/chunk/serialized_chunk.h" #include "series_data/data_storage.h" +#include "series_data/decoder.h" #include "series_data/decoder/universal_decode_iterator.h" #include "series_data/querier/query.h" From 9c5056fbeea74ff6a0b411ba143f375e4576445a Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Mon, 6 Oct 2025 14:37:53 +0300 Subject: [PATCH 09/17] entrypoint fix --- pp/entrypoint/series_data_data_storage.cpp | 13 ++++----- pp/entrypoint/series_data_data_storage.h | 6 ++-- ...ies_data_serialization_serializes_data.cpp | 29 ++++++------------- ...eries_data_serialization_serializes_data.h | 18 ++---------- 4 files changed, 21 insertions(+), 45 deletions(-) diff --git a/pp/entrypoint/series_data_data_storage.cpp b/pp/entrypoint/series_data_data_storage.cpp index c46baafd80..1d41b14c86 100644 --- a/pp/entrypoint/series_data_data_storage.cpp +++ b/pp/entrypoint/series_data_data_storage.cpp @@ -151,26 +151,25 @@ extern "C" void prompp_series_data_data_storage_query_new(void* args, void* res) struct Arguments { DataStoragePtr data_storage; Query query; - entrypoint::head::SerializedDataPtr serialized_data; }; struct Result { QuerierVariantPtr querier{}; QueryStatus status; + entrypoint::head::SerializedDataPtr serialized_data{}; }; const auto in = static_cast(args); + Result* out = new (res) Result(); - RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, in->serialized_data.get()); + RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, out->serialized_data.get()); querier.query(); if (querier.need_loading()) { - new (res) Result{ - .querier = std::make_unique(std::in_place_index<2>, std::move(querier)), - .status = QueryStatus::kNeedDataLoad, - }; + out->querier = std::make_unique(std::in_place_index<2>, std::move(querier)); + out->status = QueryStatus::kNeedDataLoad; } else { - new (res) Result{.status = QueryStatus::kSuccess}; + out->status = QueryStatus::kSuccess; } } diff --git a/pp/entrypoint/series_data_data_storage.h b/pp/entrypoint/series_data_data_storage.h index 647a1e7046..c90ca89c44 100644 --- a/pp/entrypoint/series_data_data_storage.h +++ b/pp/entrypoint/series_data_data_storage.h @@ -114,12 +114,12 @@ void prompp_series_data_data_storage_query(void* args, void* res); * @param args { * dataStorage uintptr // pointer to constructed data storage * query DataStorageQuery // query - * serializedData uintptr // pointer to serialized data * } * * @param res { - * Querier uintptr // pointer to constructed Querier if data loading is needed - * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * Querier uintptr // pointer to constructed Querier if data loading is needed + * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * serializedData uintptr // pointer to serialized data * } */ void prompp_series_data_data_storage_query_new(void* args, void* res); diff --git a/pp/entrypoint/series_data_serialization_serializes_data.cpp b/pp/entrypoint/series_data_serialization_serializes_data.cpp index eaa053b707..da8bdc9b6d 100644 --- a/pp/entrypoint/series_data_serialization_serializes_data.cpp +++ b/pp/entrypoint/series_data_serialization_serializes_data.cpp @@ -35,31 +35,20 @@ extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(v }; using Result = struct { + int64_t timestamp{}; + double value{}; bool has_value; }; Arguments* in = reinterpret_cast(args); - Result* out = new (res) Result(); - ++(*in->iterator); - out->has_value = (*in->iterator) != DecodeIteratorSentinel{}; -} - -extern "C" void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res) { - struct Arguments { - entrypoint::head::SerializedDataIteratorPtr iterator; - }; - using Result = struct { - int64_t timestamp; - double value; - }; - - Arguments* in = reinterpret_cast(args); - Result* out = new (res) Result(); - - const auto sample = **(in->iterator); - out->timestamp = sample.timestamp; - out->value = sample.value; + if (*in->iterator == DecodeIteratorSentinel{}) { + new (res) Result{.has_value = false}; + } else { + const auto sample = **(in->iterator); + new (res) Result{.timestamp = sample.timestamp, .value = sample.value, .has_value = true}; + ++(*in->iterator); + } } extern "C" void prompp_series_data_serialization_serialized_data_iterator_dtor(void* args) { diff --git a/pp/entrypoint/series_data_serialization_serializes_data.h b/pp/entrypoint/series_data_serialization_serializes_data.h index 3849fde946..6530bcb310 100644 --- a/pp/entrypoint/series_data_serialization_serializes_data.h +++ b/pp/entrypoint/series_data_serialization_serializes_data.h @@ -36,24 +36,12 @@ void prompp_series_data_serialization_serialized_data_iterator(void* args, void* * } * * @param res { - * has_data bool // is iterator has more data to decode. - * } - */ -void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); - -/** - * @brief Extract sample form decode iterator. - * - * @param args { - * iterator uintptr // pointer to decode iterator - * } - * - * @param res { + * has_data bool // is iterator has more data to decode. * timestamp int64 // sample timestamp - * value float64 // sample value + * value float64 // sample value * } */ -void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res); +void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); /** * @brief Destroy decode iterator. From ac2350772337a40e80ef449b6376896c2e5c41af Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Mon, 6 Oct 2025 16:24:01 +0300 Subject: [PATCH 10/17] tidy fix --- pp/entrypoint/series_data_serialization_serializes_data.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pp/entrypoint/series_data_serialization_serializes_data.cpp b/pp/entrypoint/series_data_serialization_serializes_data.cpp index da8bdc9b6d..dd7f9e7290 100644 --- a/pp/entrypoint/series_data_serialization_serializes_data.cpp +++ b/pp/entrypoint/series_data_serialization_serializes_data.cpp @@ -34,7 +34,7 @@ extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(v entrypoint::head::SerializedDataIteratorPtr iterator; }; - using Result = struct { + struct Result { int64_t timestamp{}; double value{}; bool has_value; From 110df4c6ddec3c84876ac20a7a6b515a03ff79a2 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Mon, 6 Oct 2025 19:13:37 +0300 Subject: [PATCH 11/17] new recoder --- pp/entrypoint/series_data_data_storage.cpp | 17 +++++++++++++++++ pp/entrypoint/series_data_data_storage.h | 16 ++++++++++++++++ pp/series_data/chunk/serialized_chunk.h | 1 + 3 files changed, 34 insertions(+) diff --git a/pp/entrypoint/series_data_data_storage.cpp b/pp/entrypoint/series_data_data_storage.cpp index 1d41b14c86..0ed84c7dc0 100644 --- a/pp/entrypoint/series_data_data_storage.cpp +++ b/pp/entrypoint/series_data_data_storage.cpp @@ -280,6 +280,23 @@ extern "C" void prompp_series_data_serialized_chunk_recoder_ctor(void* args, voi }; } +extern "C" void prompp_series_data_serialized_chunk_recoder_new_ctor(void* args, void* res) { + struct Arguments { + entrypoint::head::SerializedDataPtr serialized_data; + PromPP::Primitives::TimeInterval time_interval; + }; + struct Result { + ChunkRecoderVariantPtr chunk_recoder; + }; + + const auto in = static_cast(args); + new (res) Result{ + .chunk_recoder = std::make_unique( + std::in_place_type, + series_data::chunk::SerializedChunkIterator{in->serialized_data->get_buffer(), in->serialized_data->get_chunks()}, in->time_interval), + }; +} + extern "C" void prompp_series_data_chunk_recoder_recode_next_chunk(void* args, void* res) { struct Arguments { ChunkRecoderVariantPtr chunk_recoder; diff --git a/pp/entrypoint/series_data_data_storage.h b/pp/entrypoint/series_data_data_storage.h index c90ca89c44..e4c6df4167 100644 --- a/pp/entrypoint/series_data_data_storage.h +++ b/pp/entrypoint/series_data_data_storage.h @@ -194,6 +194,22 @@ void prompp_series_data_chunk_recoder_ctor(void* args, void* res); */ void prompp_series_data_serialized_chunk_recoder_ctor(void* args, void* res); +/** + * @brief Construct a new ChunkRecoder object to recode all serialized chunks (new model) + * + * @param args { + * serializedData uintptr // pointer to serialized data + * time_interval struct { // closed interval [min, max] + * min int64 + * max int64 + * } + * } + * @param res { + * chunk_recoder uintptr // pointer to chunk recoder + * } + */ +void prompp_series_data_serialized_chunk_recoder_new_ctor(void* args, void* res); + /** * @brief Get chunk encoded in prometheus format * diff --git a/pp/series_data/chunk/serialized_chunk.h b/pp/series_data/chunk/serialized_chunk.h index bfa07be766..f0637d5eb2 100644 --- a/pp/series_data/chunk/serialized_chunk.h +++ b/pp/series_data/chunk/serialized_chunk.h @@ -55,6 +55,7 @@ class SerializedChunkIterator { using reference = value_type&; explicit SerializedChunkIterator(std::span buffer) : data_(buffer, get_chunks(buffer)) {} + explicit SerializedChunkIterator(std::span buffer, SerializedChunkSpan chunks) : data_(buffer, chunks) {} [[nodiscard]] PROMPP_ALWAYS_INLINE const Data& operator*() const noexcept { return data_; } [[nodiscard]] PROMPP_ALWAYS_INLINE const Data* operator->() const noexcept { return &data_; } From 8346a7b72f4aa6ad65a1c9c27e02222acf13d035 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Tue, 7 Oct 2025 11:49:25 +0300 Subject: [PATCH 12/17] serializer new model tests update --- .../serialization/serialized_data.h | 9 - .../serializer_deserializer_new_tests.cpp | 171 ++++++++++-------- 2 files changed, 92 insertions(+), 88 deletions(-) diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index a2cec1c4c2..5e6655d1f8 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -78,15 +78,6 @@ class SerializedData { [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t allocated_memory() const noexcept { return chunks_.allocated_memory() + bytes_buffer_.allocated_memory(); } - [[nodiscard]] decoder::UniversalDecodeIterator create_decode_iterator(const chunk::SerializedChunk& chunk) const noexcept { - decoder::UniversalDecodeIterator iterator(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false); - std::span buffer{bytes_buffer_.control_block().data, bytes_buffer_.size()}; - Decoder::create_decode_iterator(buffer, chunk, [&iterator](Iterator&& begin, auto&&) { - iterator = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; - }); - return iterator; - } - [[nodiscard]] uint32_t next_series() noexcept { if (internal_index_ == kNoMoreSeries) [[unlikely]] { if (chunks_.empty()) [[unlikely]] { diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index 33c64be062..e4cdc78f0f 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -29,10 +29,13 @@ class SerializerDeserializerTrait { DataStorage storage_; Encoder<> encoder_{storage_}; - template - [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_chunk(DecodeIterator iterator) { + [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_current_chunk(SerializedData& data, uint32_t series_id) { SampleList result; - std::ranges::copy(iterator, DecodeIteratorSentinel{}, std::back_insert_iterator(result)); + + EXPECT_EQ(series_id, data.next_series()); + + std::ranges::copy(data.create_current_series_iterator(), DecodeIteratorSentinel{}, std::back_insert_iterator(result)); + return result; } }; @@ -43,11 +46,9 @@ TEST_F(SerializerDeserializerFixtureNew, EmptyChunksList) { // Arrange // Act - const SerializedData serialized(storage_, {}); - // const Deserializer deserializer(serialized); + SerializedData serialized(storage_, {}); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(0U, serialized.get_chunks().size()); ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized.get_buffer().size()); } @@ -64,11 +65,9 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta encoder_.encode(1, 3, 1.0); // Act - const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); - // const Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(2U, serialized.get_chunks().size()); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); @@ -79,7 +78,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ @@ -87,7 +86,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); } TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUniqueTimestampStream) { @@ -106,11 +105,9 @@ TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUn encoder_.encode(2, 3, 2.0); // Act - const SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); - // const Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(3U, serialized.get_chunks().size()); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); @@ -122,21 +119,21 @@ TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUn {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 2.0}, {.timestamp = 2, .value = 2.0}, {.timestamp = 3, .value = 2.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); + decode_current_chunk(serialized, 2))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { @@ -173,11 +170,9 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { encoder_.encode(8, 123, 4.1); // Act - const SerializedData serialized(storage_); - // Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(10U, serialized.get_chunks().size()); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); @@ -195,50 +190,50 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); + decode_current_chunk(serialized, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); + decode_current_chunk(serialized, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); + decode_current_chunk(serialized, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); + decode_current_chunk(serialized, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); + decode_current_chunk(serialized, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); + decode_current_chunk(serialized, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -246,13 +241,13 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); + decode_current_chunk(serialized, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); + decode_current_chunk(serialized, 20))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { @@ -299,11 +294,9 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - const SerializedData serialized(storage_); - // Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(10U, serialized.get_chunks().size()); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); @@ -321,50 +314,50 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); + decode_current_chunk(serialized, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); + decode_current_chunk(serialized, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); + decode_current_chunk(serialized, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); + decode_current_chunk(serialized, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); + decode_current_chunk(serialized, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); + decode_current_chunk(serialized, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -372,13 +365,13 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); + decode_current_chunk(serialized, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); + decode_current_chunk(serialized, 20))); } TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { @@ -388,15 +381,14 @@ TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); // Act - const SerializedData serialized(storage_, {QueriedChunk{1}}); - // const Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_, {QueriedChunk{1}}); // Assert EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 1))); } TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { @@ -408,8 +400,7 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { encoder_.encode(0, 103, 1.0); // Act - const SerializedData serialized(storage_); - // const Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_); // Assert EXPECT_TRUE(std::ranges::equal( @@ -417,13 +408,9 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = 1.0}, {.timestamp = 102, .value = 1.0}, - }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); - EXPECT_TRUE(std::ranges::equal( - SampleList{ {.timestamp = 103, .value = 1.0}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 0))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { @@ -470,11 +457,9 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { encoder_.encode(8, 134, STALE_NAN); // Act - const SerializedData serialized(storage_); - // Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(10U, serialized.get_chunks().size()); EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); @@ -494,20 +479,20 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); + decode_current_chunk(serialized, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -515,7 +500,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); + decode_current_chunk(serialized, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -523,7 +508,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); + decode_current_chunk(serialized, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -531,20 +516,20 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); + decode_current_chunk(serialized, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); + decode_current_chunk(serialized, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); + decode_current_chunk(serialized, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -553,14 +538,14 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); + decode_current_chunk(serialized, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); + decode_current_chunk(serialized, 20))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { @@ -617,11 +602,9 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - const SerializedData serialized(storage_); - // Deserializer deserializer(get_buffer()); + SerializedData serialized(storage_); // Assert - // ASSERT_TRUE(deserializer.is_valid()); ASSERT_EQ(10U, serialized.get_chunks().size()); EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); @@ -641,20 +624,20 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[0])))); + decode_current_chunk(serialized, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[1])))); + decode_current_chunk(serialized, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[2])))); + decode_current_chunk(serialized, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -662,7 +645,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[3])))); + decode_current_chunk(serialized, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -670,7 +653,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[4])))); + decode_current_chunk(serialized, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -678,20 +661,20 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[5])))); + decode_current_chunk(serialized, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[6])))); + decode_current_chunk(serialized, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[7])))); + decode_current_chunk(serialized, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -700,14 +683,14 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[8])))); + decode_current_chunk(serialized, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_chunk(serialized.create_decode_iterator(serialized.get_chunks()[9])))); + decode_current_chunk(serialized, 20))); } class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, public testing::Test { @@ -767,6 +750,36 @@ TEST_F(DeserializerIteratorFixtureNew, OneChunkFinalized) { decoded_chunks)); } +TEST_F(DeserializerIteratorFixtureNew, OneChunkSeveralFinalized) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(0, 2, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 3, 1.0); + encoder_.encode(0, 4, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 5, 1.0); + encoder_.encode(0, 6, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 7, 1.0); + encoder_.encode(0, 8, 1.0); + + // Act + SerializedData serialized(storage_); + auto decoded_chunks = decode_chunks(serialized); + + // Assert + EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + {.timestamp = 4, .value = 1.0}, + {.timestamp = 5, .value = 1.0}, + {.timestamp = 6, .value = 1.0}, + {.timestamp = 7, .value = 1.0}, + {.timestamp = 8, .value = 1.0}}}, + decoded_chunks)); +} + TEST_F(DeserializerIteratorFixtureNew, TwoChunks) { // Arrange encoder_.encode(0, 1, 1.0); From c7b82164f11e38a3daa1b6d52fb200f969022532 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Thu, 9 Oct 2025 20:44:09 +0300 Subject: [PATCH 13/17] review fixes --- pp/entrypoint/head/serialization.h | 20 +- pp/entrypoint/series_data/querier.h | 8 +- pp/entrypoint/series_data_data_storage.cpp | 10 +- ...es_data_serialization_serialized_data.cpp} | 15 +- ...ries_data_serialization_serialized_data.h} | 0 .../benchmarks/serializer_benchmark.cpp | 106 ++---- .../serialization/serialized_data.h | 264 ++++++------- .../serializer_deserializer_new_tests.cpp | 348 +++++++----------- 8 files changed, 343 insertions(+), 428 deletions(-) rename pp/entrypoint/{series_data_serialization_serializes_data.cpp => series_data_serialization_serialized_data.cpp} (79%) rename pp/entrypoint/{series_data_serialization_serializes_data.h => series_data_serialization_serialized_data.h} (100%) diff --git a/pp/entrypoint/head/serialization.h b/pp/entrypoint/head/serialization.h index 6ac36a2479..eb732d1943 100644 --- a/pp/entrypoint/head/serialization.h +++ b/pp/entrypoint/head/serialization.h @@ -4,8 +4,24 @@ namespace entrypoint::head { -using SerializedDataPtr = std::unique_ptr; -using SerializedDataIteratorPtr = std::unique_ptr; +class SerializedDataGo { + public: + explicit SerializedDataGo(const series_data::DataStorage& storage, const series_data::querier::QueriedChunkList& queried_chunks) + : data_{series_data::serialization::DataSerializer::serialize(storage, queried_chunks)} {} + + [[nodiscard]] PROMPP_ALWAYS_INLINE auto get_buffer() const noexcept { return data_view_.get_buffer(); } + [[nodiscard]] PROMPP_ALWAYS_INLINE auto get_chunks() const noexcept { return data_view_.get_chunks(); } + + [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t next() noexcept { return data_view_.next_series(); } + [[nodiscard]] PROMPP_ALWAYS_INLINE auto iterator() const noexcept { return data_view_.create_current_series_iterator(); } + + private: + series_data::serialization::SerializedData data_; + series_data::serialization::SerializedDataView data_view_{data_}; +}; + +using SerializedDataPtr = std::unique_ptr; +using SerializedDataIteratorPtr = std::unique_ptr; static_assert(sizeof(SerializedDataPtr) == sizeof(void*)); static_assert(sizeof(SerializedDataIteratorPtr) == sizeof(void*)); diff --git a/pp/entrypoint/series_data/querier.h b/pp/entrypoint/series_data/querier.h index 08b0434b73..76d2f05fd8 100644 --- a/pp/entrypoint/series_data/querier.h +++ b/pp/entrypoint/series_data/querier.h @@ -98,7 +98,7 @@ class RangeQuerierWithArgumentsWrapperNew { using BytesStream = PromPP::Primitives::Go::BytesStream; public: - RangeQuerierWithArgumentsWrapperNew(DataStorage& storage, const Query& query, ::series_data::serialization::SerializedData* serialized_data) + RangeQuerierWithArgumentsWrapperNew(DataStorage& storage, const Query& query, head::SerializedDataPtr* serialized_data) : querier_(storage), query_(&query), serialized_data_(serialized_data) {} void query() noexcept { @@ -117,9 +117,11 @@ class RangeQuerierWithArgumentsWrapperNew { private: ::series_data::querier::Querier querier_; const Query* query_; - ::series_data::serialization::SerializedData* serialized_data_; + head::SerializedDataPtr* serialized_data_; - PROMPP_ALWAYS_INLINE void serialize_chunks() const noexcept { std::construct_at(serialized_data_, querier_.get_storage(), querier_.chunks()); } + PROMPP_ALWAYS_INLINE void serialize_chunks() const noexcept { + std::construct_at(serialized_data_, std::make_unique(querier_.get_storage(), querier_.chunks())); + } }; enum class QuerierType : uint8_t { kInstantQuerier = 0, kRangeQuerier, kRangeQuerierNew }; diff --git a/pp/entrypoint/series_data_data_storage.cpp b/pp/entrypoint/series_data_data_storage.cpp index 0ed84c7dc0..40bb15bced 100644 --- a/pp/entrypoint/series_data_data_storage.cpp +++ b/pp/entrypoint/series_data_data_storage.cpp @@ -156,13 +156,13 @@ extern "C" void prompp_series_data_data_storage_query_new(void* args, void* res) struct Result { QuerierVariantPtr querier{}; QueryStatus status; - entrypoint::head::SerializedDataPtr serialized_data{}; + entrypoint::head::SerializedDataPtr* serialized_data{}; }; const auto in = static_cast(args); - Result* out = new (res) Result(); + auto* out = new (res) Result(); - RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, out->serialized_data.get()); + RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, out->serialized_data); querier.query(); if (querier.need_loading()) { @@ -282,7 +282,7 @@ extern "C" void prompp_series_data_serialized_chunk_recoder_ctor(void* args, voi extern "C" void prompp_series_data_serialized_chunk_recoder_new_ctor(void* args, void* res) { struct Arguments { - entrypoint::head::SerializedDataPtr serialized_data; + entrypoint::head::SerializedDataPtr* serialized_data; PromPP::Primitives::TimeInterval time_interval; }; struct Result { @@ -293,7 +293,7 @@ extern "C" void prompp_series_data_serialized_chunk_recoder_new_ctor(void* args, new (res) Result{ .chunk_recoder = std::make_unique( std::in_place_type, - series_data::chunk::SerializedChunkIterator{in->serialized_data->get_buffer(), in->serialized_data->get_chunks()}, in->time_interval), + series_data::chunk::SerializedChunkIterator{in->serialized_data->get()->get_buffer(), in->serialized_data->get()->get_chunks()}, in->time_interval), }; } diff --git a/pp/entrypoint/series_data_serialization_serializes_data.cpp b/pp/entrypoint/series_data_serialization_serialized_data.cpp similarity index 79% rename from pp/entrypoint/series_data_serialization_serializes_data.cpp rename to pp/entrypoint/series_data_serialization_serialized_data.cpp index dd7f9e7290..28e4f4e8bf 100644 --- a/pp/entrypoint/series_data_serialization_serializes_data.cpp +++ b/pp/entrypoint/series_data_serialization_serialized_data.cpp @@ -1,30 +1,30 @@ -#include "series_data_serialization_serializes_data.h" +#include "series_data_serialization_serialized_data.h" #include "head/serialization.h" extern "C" void prompp_series_data_serialization_serialized_data_next(void* args, void* res) { struct Arguments { - entrypoint::head::SerializedDataPtr serialized_data; + entrypoint::head::SerializedDataPtr* serialized_data; }; using Result = struct { uint32_t series_id; }; - new (res) Result{.series_id = reinterpret_cast(args)->serialized_data->next_series()}; + new (res) Result{.series_id = reinterpret_cast(args)->serialized_data->get()->next()}; } extern "C" void prompp_series_data_serialization_serialized_data_iterator(void* args, void* res) { struct Arguments { - entrypoint::head::SerializedDataPtr serialized_data; + entrypoint::head::SerializedDataPtr* serialized_data; }; using Result = struct { entrypoint::head::SerializedDataIteratorPtr iterator; }; - new (res) Result{.iterator = std::make_unique( - static_cast(args)->serialized_data->create_current_series_iterator())}; + new (res) Result{.iterator = std::make_unique( + static_cast(args)->serialized_data->get()->iterator())}; } extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res) { @@ -61,8 +61,9 @@ extern "C" void prompp_series_data_serialization_serialized_data_iterator_dtor(v extern "C" void prompp_series_data_serialization_serialized_data_dtor(void* args) { struct Arguments { - entrypoint::head::SerializedDataPtr serialized_data; + entrypoint::head::SerializedDataPtr* serialized_data; }; + std::destroy_at(static_cast(args)->serialized_data); static_cast(args)->~Arguments(); } \ No newline at end of file diff --git a/pp/entrypoint/series_data_serialization_serializes_data.h b/pp/entrypoint/series_data_serialization_serialized_data.h similarity index 100% rename from pp/entrypoint/series_data_serialization_serializes_data.h rename to pp/entrypoint/series_data_serialization_serialized_data.h diff --git a/pp/series_data/benchmarks/serializer_benchmark.cpp b/pp/series_data/benchmarks/serializer_benchmark.cpp index 62cab803a8..6f54fd9052 100644 --- a/pp/series_data/benchmarks/serializer_benchmark.cpp +++ b/pp/series_data/benchmarks/serializer_benchmark.cpp @@ -7,6 +7,7 @@ #include #include "bare_bones/preprocess.h" +#include "primitives/go_slice.h" #include "series_data/encoder.h" #include "series_data/querier/query.h" #include "series_data/serialization/serialized_data.h" @@ -16,6 +17,9 @@ namespace { using BareBones::StreamVByte::CompactSequence; using BareBones::StreamVByte::Sequence; +using series_data::serialization::DataSerializer; +using series_data::serialization::SerializedData; +using series_data::serialization::SerializedDataView; struct PROMPP_ATTRIBUTE_PACKED SeriesSample { uint32_t series_id; @@ -41,6 +45,24 @@ const BareBones::Vector& get_samples_for_benchmark() { return samples_from_file; } +series_data::querier::QueriedChunkList generate_query(uint32_t size) { + series_data::querier::QueriedChunkList chunk_list; + + std::vector v(size); + std::iota(v.begin(), v.end(), 0); + + std::mt19937 g(42); + std::ranges::shuffle(v, g); + v.resize(v.size() / 10); + + chunk_list.reserve(v.size()); + for (uint32_t ls_id : v) { + chunk_list.emplace_back(ls_id); + } + + return chunk_list; +} + void BenchmarkWalSerializer(benchmark::State& state) { const auto& samples = get_samples_for_benchmark(); const double percent = state.range(0) / 100.0; @@ -58,34 +80,23 @@ void BenchmarkWalSerializer(benchmark::State& state) { } } - series_data::querier::QueriedChunkList chunk_list; - { - std::vector v(storage.open_chunks.size()); - std::iota(v.begin(), v.end(), 0); - - std::mt19937 g(42); - std::ranges::shuffle(v, g); - v.resize(v.size() / 10); - - chunk_list.reserve(v.size()); - for (uint32_t ls_id : v) { - chunk_list.emplace_back(ls_id); - } - } + const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + PromPP::Primitives::Go::Slice slice; + PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); } { series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + PromPP::Primitives::Go::Slice slice; + PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["Stream Size"] = benchmark::Counter(slice.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } @@ -106,34 +117,23 @@ void BenchmarkWalConstantSerializer(benchmark::State& state) { } } - series_data::querier::QueriedChunkList chunk_list; - { - std::vector v(storage.open_chunks.size()); - std::iota(v.begin(), v.end(), 0); - - std::mt19937 g(42); - std::ranges::shuffle(v, g); - v.resize(v.size() / 10); - - chunk_list.reserve(v.size()); - for (uint32_t ls_id : v) { - chunk_list.emplace_back(ls_id); - } - } + const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + PromPP::Primitives::Go::Slice slice; + PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); } { series_data::serialization::Serializer serializer_{storage}; - BareBones::ShrinkedToFitOStringStream stream; + PromPP::Primitives::Go::Slice slice; + PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(stream.view().size(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["Stream Size"] = benchmark::Counter(slice.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } @@ -154,28 +154,15 @@ void BenchmarkWalSerializedData(benchmark::State& state) { } } - series_data::querier::QueriedChunkList chunk_list; - { - std::vector v(storage.open_chunks.size()); - std::iota(v.begin(), v.end(), 0); - - std::mt19937 g(42); - std::ranges::shuffle(v, g); - v.resize(v.size() / 10); - - chunk_list.reserve(v.size()); - for (uint32_t ls_id : v) { - chunk_list.emplace_back(ls_id); - } - } + const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { - series_data::serialization::SerializedData serialized(storage, chunk_list); + SerializedData serialized = DataSerializer::serialize(storage, chunk_list); benchmark::DoNotOptimize(serialized); } { - series_data::serialization::SerializedData serialized(storage, chunk_list); + SerializedData serialized = DataSerializer::serialize(storage, chunk_list); state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } @@ -197,28 +184,15 @@ void BenchmarkWalConstantSerializedData(benchmark::State& state) { } } - series_data::querier::QueriedChunkList chunk_list; - { - std::vector v(storage.open_chunks.size()); - std::iota(v.begin(), v.end(), 0); - - std::mt19937 g(42); - std::ranges::shuffle(v, g); - v.resize(v.size() / 10); - - chunk_list.reserve(v.size()); - for (uint32_t ls_id : v) { - chunk_list.emplace_back(ls_id); - } - } + const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { - series_data::serialization::SerializedData serialized(storage, chunk_list); + SerializedData serialized = DataSerializer::serialize(storage, chunk_list); benchmark::DoNotOptimize(serialized); } { - series_data::serialization::SerializedData serialized(storage, chunk_list); + SerializedData serialized = DataSerializer::serialize(storage, chunk_list); state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 5e6655d1f8..7e6f7ede6c 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -7,106 +7,20 @@ #include "series_data/querier/query.h" namespace series_data::serialization { -class SerializedData { - public: - static constexpr uint32_t kNoMoreSeries = std::numeric_limits::max(); - class SerializedSeriesIterator { - public: - using iterator_category = std::forward_iterator_tag; - using value_type = encoder::Sample; - using difference_type = ptrdiff_t; - using pointer = value_type*; - using reference = value_type&; - - SerializedSeriesIterator(const BareBones::Memory& buffer, - chunk::SerializedChunkSpan chunks, - uint32_t chunk_id) - : decode_iter_(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false), - chunk_iter_(chunks.begin() + chunk_id), - series_id_(chunk_iter_->label_set_id), - buffer_(buffer.control_block().data, buffer.size()), - chunks_(chunks) { - Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { - decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; - }); - } +struct SerializedData { + BareBones::Vector chunks; + BareBones::Memory bytes_buffer; - [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample& operator*() const noexcept { return *decode_iter_; } - [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample* operator->() const noexcept { return decode_iter_.operator->(); } - - PROMPP_ALWAYS_INLINE SerializedSeriesIterator& operator++() noexcept { - ++decode_iter_; - if (decode_iter_ == decoder::DecodeIteratorSentinel{}) [[unlikely]] { - if (std::next(chunk_iter_) != chunks_.end() && series_id_ == std::next(chunk_iter_)->label_set_id) { - ++chunk_iter_; - Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { - decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; - }); - } - } - return *this; - } - - PROMPP_ALWAYS_INLINE SerializedSeriesIterator operator++(int) noexcept { - const auto it = *this; - ++*this; - return it; - } - - PROMPP_ALWAYS_INLINE bool operator==(const decoder::DecodeIteratorSentinel&) const noexcept { - return (decode_iter_ == decoder::DecodeIteratorSentinel{}) && - (std::next(chunk_iter_) == chunks_.end() || series_id_ != std::next(chunk_iter_)->label_set_id); - } - - private: - decoder::UniversalDecodeIterator decode_iter_; - chunk::SerializedChunkSpan::const_iterator chunk_iter_; - uint32_t series_id_; - - std::span buffer_; - chunk::SerializedChunkSpan chunks_; - }; - - explicit SerializedData(const DataStorage& storage, const querier::QueriedChunkList& queried_chunks) noexcept { serialize_internal(storage, queried_chunks); } - explicit SerializedData(const DataStorage& storage) noexcept { serialize_internal(storage, storage.chunks()); } - - [[nodiscard]] PROMPP_ALWAYS_INLINE chunk::SerializedChunkSpan get_chunks() const noexcept { return {chunks_.data(), chunks_.size()}; } - [[nodiscard]] PROMPP_ALWAYS_INLINE std::span get_buffer() const noexcept { - return {bytes_buffer_.control_block().data, bytes_buffer_.size()}; - } - - [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t allocated_memory() const noexcept { return chunks_.allocated_memory() + bytes_buffer_.allocated_memory(); } - - [[nodiscard]] uint32_t next_series() noexcept { - if (internal_index_ == kNoMoreSeries) [[unlikely]] { - if (chunks_.empty()) [[unlikely]] { - return kNoMoreSeries; - } - internal_index_ = 0; - return chunks_[0].label_set_id; - } - - if (internal_index_ == chunks_.size()) [[unlikely]] { - return kNoMoreSeries; - } - - const uint32_t current_series_id = chunks_[internal_index_].label_set_id; - while (internal_index_ < chunks_.size() && current_series_id == chunks_[internal_index_].label_set_id) { - ++internal_index_; - } - - if (internal_index_ == chunks_.size()) [[unlikely]] { - return kNoMoreSeries; - } - - return chunks_[internal_index_].label_set_id; - } + [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t allocated_memory() const noexcept { return chunks.allocated_memory() + bytes_buffer.allocated_memory(); } +}; - [[nodiscard]] SerializedSeriesIterator create_current_series_iterator() const noexcept { return {bytes_buffer_, get_chunks(), internal_index_}; } - [[nodiscard]] auto create_current_series_range() const noexcept { - return std::ranges::subrange(SerializedSeriesIterator{bytes_buffer_, get_chunks(), internal_index_}, decoder::DecodeIteratorSentinel{}); +class DataSerializer { + public: + static SerializedData serialize(const DataStorage& storage, const querier::QueriedChunkList& queried_chunks) noexcept { + return serialize_internal(storage, queried_chunks); } + static SerializedData serialize(const DataStorage& storage) noexcept { return serialize_internal(storage, storage.chunks()); } private: struct TimestampStreamsData { @@ -120,12 +34,11 @@ class SerializedData { }; template - void serialize_internal(const DataStorage& storage, const ChunkList& chunks) noexcept { + static SerializedData serialize_internal(const DataStorage& storage, const ChunkList& chunks) noexcept { const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); - const uint32_t chunk_count = get_chunk_count(chunks); - - chunks_.reserve(chunk_count); + SerializedData serialized_data; + serialized_data.chunks.reserve(get_chunk_count(chunks)); uint32_t data_size = 0; @@ -135,16 +48,19 @@ class SerializedData { if (chunk_data.is_open()) [[likely]] { if (const auto& chunk = get_chunk(storage, chunk_data); !chunk.is_empty()) [[likely]] { - fill_serialized_chunk(storage, chunk, chunks_.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, bytes_buffer_); + fill_serialized_chunk(storage, chunk, serialized_data.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, + serialized_data.bytes_buffer); } } else { - fill_serialized_chunk(storage, get_chunk(storage, chunk_data), chunks_.emplace_back(chunk_data.series_id()), - timestamp_streams_data, data_size, bytes_buffer_); + fill_serialized_chunk(storage, get_chunk(storage, chunk_data), serialized_data.chunks.emplace_back(chunk_data.series_id()), + timestamp_streams_data, data_size, serialized_data.bytes_buffer); } } - bytes_buffer_.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); - std::memcpy(bytes_buffer_.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); + serialized_data.bytes_buffer.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); + std::memcpy(serialized_data.bytes_buffer.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); + + return serialized_data; } template @@ -157,12 +73,12 @@ class SerializedData { } template - void fill_serialized_chunk(const DataStorage& storage, - const chunk::DataChunk& chunk, - chunk::SerializedChunk& serialized_chunk, - TimestampStreamsData& timestamp_streams_data, - uint32_t& data_size, - BareBones::Memory& buffer) const noexcept { + static void fill_serialized_chunk(const DataStorage& storage, + const chunk::DataChunk& chunk, + chunk::SerializedChunk& serialized_chunk, + TimestampStreamsData& timestamp_streams_data, + uint32_t& data_size, + BareBones::Memory& buffer) noexcept { using enum EncodingType; serialized_chunk.encoding_state = chunk.encoding_state; @@ -185,8 +101,7 @@ class SerializedData { case kDoubleConstant: { serialized_chunk.set_offset(data_size); buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::DoubleConstantEncoder)); - std::memcpy(buffer.control_block().data + data_size, &storage.variant_encoders[chunk.encoder.external_index].double_constant, - sizeof(encoder::value::DoubleConstantEncoder)); + std::memcpy(buffer + data_size, &storage.variant_encoders[chunk.encoder.external_index].double_constant, sizeof(encoder::value::DoubleConstantEncoder)); data_size += sizeof(encoder::value::DoubleConstantEncoder); break; } @@ -194,7 +109,7 @@ class SerializedData { case kTwoDoubleConstant: { serialized_chunk.set_offset(data_size); buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::TwoDoubleConstantEncoder)); - std::memcpy(buffer.control_block().data + data_size, &storage.variant_encoders[chunk.encoder.external_index].two_double_constant, + std::memcpy(buffer + data_size, &storage.variant_encoders[chunk.encoder.external_index].two_double_constant, sizeof(encoder::value::TwoDoubleConstantEncoder)); data_size += sizeof(encoder::value::TwoDoubleConstantEncoder); break; @@ -247,12 +162,12 @@ class SerializedData { } template - void fill_timestamp_stream_offset(const DataStorage& storage, - TimestampStreamsData& timestamp_streams_data, - encoder::timestamp::State::Id timestamp_stream_id, - chunk::SerializedChunk& serialized_chunk, - uint32_t& data_size, - BareBones::Memory& buffer) const noexcept { + static void fill_timestamp_stream_offset(const DataStorage& storage, + TimestampStreamsData& timestamp_streams_data, + encoder::timestamp::State::Id timestamp_stream_id, + chunk::SerializedChunk& serialized_chunk, + uint32_t& data_size, + BareBones::Memory& buffer) noexcept { if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) [[unlikely]] { timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); @@ -276,15 +191,112 @@ class SerializedData { template static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, uint32_t& data_size, - BareBones::Memory& buffer) { + BareBones::Memory& buffer) noexcept { const auto bytes_count = bit_sequence.size_in_bytes(); buffer.grow_to_fit_at_least(data_size + bytes_count); - std::memcpy(buffer.control_block().data + data_size, bit_sequence.raw_bytes(), bytes_count); + std::memcpy(buffer + data_size, bit_sequence.raw_bytes(), bytes_count); data_size += bytes_count; } +}; + +class SerializedDataView { + public: + static constexpr uint32_t kNoMoreSeries = std::numeric_limits::max(); + + class SerializedSeriesIterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = encoder::Sample; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + SerializedSeriesIterator(const BareBones::Memory& buffer, + chunk::SerializedChunkSpan chunks, + uint32_t chunk_id) + : decode_iter_(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false), + chunk_iter_(chunks.begin() + chunk_id), + series_id_(chunk_iter_->label_set_id), + buffer_(buffer.control_block().data, buffer.size()), + chunks_(chunks) { + Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { + decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + } + + [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample& operator*() const noexcept { return *decode_iter_; } + [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample* operator->() const noexcept { return decode_iter_.operator->(); } + + PROMPP_ALWAYS_INLINE SerializedSeriesIterator& operator++() noexcept { + ++decode_iter_; + if (decode_iter_ == decoder::DecodeIteratorSentinel{}) [[unlikely]] { + if (std::next(chunk_iter_) != chunks_.end() && series_id_ == std::next(chunk_iter_)->label_set_id) { + ++chunk_iter_; + Decoder::create_decode_iterator(buffer_, *chunk_iter_, [&](Iterator&& begin, auto&&) { + decode_iter_ = decoder::UniversalDecodeIterator{std::in_place_type, std::forward(begin)}; + }); + } + } + return *this; + } + + PROMPP_ALWAYS_INLINE SerializedSeriesIterator operator++(int) noexcept { + const auto it = *this; + ++*this; + return it; + } + + PROMPP_ALWAYS_INLINE bool operator==(const decoder::DecodeIteratorSentinel&) const noexcept { + return (decode_iter_ == decoder::DecodeIteratorSentinel{}) && + (std::next(chunk_iter_) == chunks_.end() || series_id_ != std::next(chunk_iter_)->label_set_id); + } + + private: + decoder::UniversalDecodeIterator decode_iter_; + chunk::SerializedChunkSpan::const_iterator chunk_iter_; + uint32_t series_id_; + + std::span buffer_; + chunk::SerializedChunkSpan chunks_; + }; + + explicit SerializedDataView(const SerializedData& serialized_data) : data_(serialized_data), series_index_{kNoMoreSeries} {} + + [[nodiscard]] PROMPP_ALWAYS_INLINE chunk::SerializedChunkSpan get_chunks() const noexcept { return {data_.chunks.data(), data_.chunks.size()}; } + [[nodiscard]] PROMPP_ALWAYS_INLINE std::span get_buffer() const noexcept { + return {data_.bytes_buffer.control_block().data, data_.bytes_buffer.size()}; + } - BareBones::Vector chunks_; - BareBones::Memory bytes_buffer_; - uint32_t internal_index_ = std::numeric_limits::max(); + [[nodiscard]] uint32_t next_series() noexcept { + const auto& chunks = data_.chunks; + if (series_index_ == kNoMoreSeries) [[unlikely]] { + if (chunks.empty()) [[unlikely]] { + return kNoMoreSeries; + } + series_index_ = 0; + return chunks[0].label_set_id; + } + + if (series_index_ == chunks.size()) [[unlikely]] { + return kNoMoreSeries; + } + + const uint32_t current_series_id = chunks[series_index_].label_set_id; + while (series_index_ < chunks.size() && current_series_id == chunks[series_index_].label_set_id) { + ++series_index_; + } + + if (series_index_ == chunks.size()) [[unlikely]] { + return kNoMoreSeries; + } + + return chunks[series_index_].label_set_id; + } + + [[nodiscard]] SerializedSeriesIterator create_current_series_iterator() const noexcept { return {data_.bytes_buffer, get_chunks(), series_index_}; } + + private: + const SerializedData& data_; + uint32_t series_index_; }; -} // namespace series_data::serialization \ No newline at end of file +}; // namespace series_data::serialization \ No newline at end of file diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index e4cdc78f0f..9b7c806b5c 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -21,15 +21,16 @@ using series_data::encoder::Sample; using series_data::encoder::SampleList; using series_data::querier::QueriedChunk; using series_data::querier::QueriedChunkList; -using series_data::serialization::Deserializer; +using series_data::serialization::DataSerializer; using series_data::serialization::SerializedData; +using series_data::serialization::SerializedDataView; class SerializerDeserializerTrait { protected: DataStorage storage_; Encoder<> encoder_{storage_}; - [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_current_chunk(SerializedData& data, uint32_t series_id) { + [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_current_chunk(SerializedDataView& data, uint32_t series_id) { SampleList result; EXPECT_EQ(series_id, data.next_series()); @@ -46,11 +47,12 @@ TEST_F(SerializerDeserializerFixtureNew, EmptyChunksList) { // Arrange // Act - SerializedData serialized(storage_, {}); + SerializedData serialized = DataSerializer::serialize(storage_, {}); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(0U, serialized.get_chunks().size()); - ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized.get_buffer().size()); + ASSERT_EQ(0U, serialized_view.get_chunks().size()); + ASSERT_EQ(series_data::encoder::CompactBitSequence::reserved_bytes_for_reader().size(), serialized_view.get_buffer().size()); } TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimestampStream) { @@ -65,20 +67,21 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta encoder_.encode(1, 3, 1.0); // Act - SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); + SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0}, QueriedChunk{1}}); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(2U, serialized.get_chunks().size()); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); - EXPECT_EQ(serialized.get_chunks()[0].timestamps_offset, serialized.get_chunks()[1].timestamps_offset); + ASSERT_EQ(2U, serialized_view.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + EXPECT_EQ(serialized_view.get_chunks()[0].timestamps_offset, serialized_view.get_chunks()[1].timestamps_offset); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ @@ -86,7 +89,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); } TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUniqueTimestampStream) { @@ -105,35 +108,36 @@ TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUn encoder_.encode(2, 3, 2.0); // Act - SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); + SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(3U, serialized.get_chunks().size()); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[2].encoding_state.encoding_type); - EXPECT_EQ(serialized.get_chunks()[0].timestamps_offset, serialized.get_chunks()[1].timestamps_offset); + ASSERT_EQ(3U, serialized_view.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + EXPECT_EQ(serialized_view.get_chunks()[0].timestamps_offset, serialized_view.get_chunks()[1].timestamps_offset); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 1, .value = 2.0}, {.timestamp = 2, .value = 2.0}, {.timestamp = 3, .value = 2.0}, }, - decode_current_chunk(serialized, 2))); + decode_current_chunk(serialized_view, 2))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { @@ -170,70 +174,71 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { encoder_.encode(8, 123, 4.1); // Act - SerializedData serialized(storage_); + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(10U, serialized.get_chunks().size()); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); - ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); + ASSERT_EQ(10U, serialized_view.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized_view.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized_view.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized_view.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized_view.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized_view.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized_view.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_current_chunk(serialized, 2))); + decode_current_chunk(serialized_view, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_current_chunk(serialized, 3))); + decode_current_chunk(serialized_view, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_current_chunk(serialized, 4))); + decode_current_chunk(serialized_view, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_current_chunk(serialized, 5))); + decode_current_chunk(serialized_view, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_current_chunk(serialized, 6))); + decode_current_chunk(serialized_view, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_current_chunk(serialized, 7))); + decode_current_chunk(serialized_view, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -241,13 +246,13 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_current_chunk(serialized, 8))); + decode_current_chunk(serialized_view, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_current_chunk(serialized, 20))); + decode_current_chunk(serialized_view, 20))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { @@ -294,70 +299,71 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - SerializedData serialized(storage_); + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(10U, serialized.get_chunks().size()); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); - ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); + ASSERT_EQ(10U, serialized_view.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized_view.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized_view.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized_view.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized_view.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized_view.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized_view.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 101, .value = 1.1}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = 1.2}, }, - decode_current_chunk(serialized, 2))); + decode_current_chunk(serialized_view, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.0}, {.timestamp = 105, .value = 2.0}, {.timestamp = 106, .value = 3.0}, }, - decode_current_chunk(serialized, 3))); + decode_current_chunk(serialized_view, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, {.timestamp = 109, .value = 3.1}, }, - decode_current_chunk(serialized, 4))); + decode_current_chunk(serialized_view, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 110, .value = 1.1}, {.timestamp = 111, .value = 2.1}, {.timestamp = 112, .value = 3.1}, }, - decode_current_chunk(serialized, 5))); + decode_current_chunk(serialized_view, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 113, .value = 2.0}, }, - decode_current_chunk(serialized, 6))); + decode_current_chunk(serialized_view, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 114, .value = -1.0}, {.timestamp = 115, .value = -1.0}, }, - decode_current_chunk(serialized, 7))); + decode_current_chunk(serialized_view, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 120, .value = 1.0}, @@ -365,13 +371,13 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { {.timestamp = 122, .value = 3.0}, {.timestamp = 123, .value = 4.1}, }, - decode_current_chunk(serialized, 8))); + decode_current_chunk(serialized_view, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.1}, {.timestamp = 108, .value = 2.1}, }, - decode_current_chunk(serialized, 20))); + decode_current_chunk(serialized_view, 20))); } TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { @@ -381,14 +387,15 @@ TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); // Act - SerializedData serialized(storage_, {QueriedChunk{1}}); + SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{1}}); + SerializedDataView serialized_view(serialized); // Assert EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); } TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { @@ -400,7 +407,8 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { encoder_.encode(0, 103, 1.0); // Act - SerializedData serialized(storage_); + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); // Assert EXPECT_TRUE(std::ranges::equal( @@ -410,7 +418,7 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { {.timestamp = 102, .value = 1.0}, {.timestamp = 103, .value = 1.0}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); } TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { @@ -457,42 +465,43 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { encoder_.encode(8, 134, STALE_NAN); // Act - SerializedData serialized(storage_); + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(10U, serialized.get_chunks().size()); - EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); - ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); + ASSERT_EQ(10U, serialized_view.get_chunks().size()); + EXPECT_TRUE(std::ranges::all_of(serialized_view.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized_view.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized_view.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized_view.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized_view.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized_view.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized_view.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 2))); + decode_current_chunk(serialized_view, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -500,7 +509,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 3))); + decode_current_chunk(serialized_view, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -508,7 +517,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 4))); + decode_current_chunk(serialized_view, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -516,20 +525,20 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 5))); + decode_current_chunk(serialized_view, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 6))); + decode_current_chunk(serialized_view, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 7))); + decode_current_chunk(serialized_view, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -538,14 +547,14 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 8))); + decode_current_chunk(serialized_view, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 20))); + decode_current_chunk(serialized_view, 20))); } TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { @@ -602,42 +611,43 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - SerializedData serialized(storage_); + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); // Assert - ASSERT_EQ(10U, serialized.get_chunks().size()); - EXPECT_TRUE(std::ranges::all_of(serialized.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[0].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kDoubleConstant, serialized.get_chunks()[1].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[2].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscInteger, serialized.get_chunks()[3].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kValuesGorilla, serialized.get_chunks()[4].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kGorilla, serialized.get_chunks()[5].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kUint32Constant, serialized.get_chunks()[6].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kFloat32Constant, serialized.get_chunks()[7].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized.get_chunks()[8].encoding_state.encoding_type); - ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized.get_chunks()[9].encoding_state.encoding_type); - ASSERT_EQ(20U, serialized.get_chunks()[9].label_set_id); + ASSERT_EQ(10U, serialized_view.get_chunks().size()); + EXPECT_TRUE(std::ranges::all_of(serialized_view.get_chunks(), [](const auto& chunk) { return chunk.encoding_state.has_last_stalenan; })); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kDoubleConstant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscInteger, serialized_view.get_chunks()[3].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kValuesGorilla, serialized_view.get_chunks()[4].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kGorilla, serialized_view.get_chunks()[5].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[6].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kFloat32Constant, serialized_view.get_chunks()[7].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kAscIntegerThenValuesGorilla, serialized_view.get_chunks()[8].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kTwoDoubleConstant, serialized_view.get_chunks()[9].encoding_state.encoding_type); + ASSERT_EQ(20U, serialized_view.get_chunks()[9].label_set_id); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 100, .value = 1.0}, {.timestamp = 101, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 0))); + decode_current_chunk(serialized_view, 0))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 102, .value = 1.1}, {.timestamp = 103, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 1))); + decode_current_chunk(serialized_view, 1))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 104, .value = 1.1}, {.timestamp = 105, .value = 1.2}, {.timestamp = 106, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 2))); + decode_current_chunk(serialized_view, 2))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 107, .value = 1.0}, @@ -645,7 +655,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 109, .value = 3.0}, {.timestamp = 110, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 3))); + decode_current_chunk(serialized_view, 3))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, @@ -653,7 +663,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 113, .value = 3.1}, {.timestamp = 114, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 4))); + decode_current_chunk(serialized_view, 4))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 115, .value = 1.1}, @@ -661,20 +671,20 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 117, .value = 3.1}, {.timestamp = 118, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 5))); + decode_current_chunk(serialized_view, 5))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 119, .value = 2.0}, {.timestamp = 120, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 6))); + decode_current_chunk(serialized_view, 6))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 121, .value = -1.0}, {.timestamp = 122, .value = -1.0}, {.timestamp = 123, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 7))); + decode_current_chunk(serialized_view, 7))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 130, .value = 1.0}, @@ -683,113 +693,13 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { {.timestamp = 133, .value = 4.1}, {.timestamp = 134, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 8))); + decode_current_chunk(serialized_view, 8))); EXPECT_TRUE(std::ranges::equal( SampleList{ {.timestamp = 111, .value = 1.1}, {.timestamp = 112, .value = 2.1}, {.timestamp = 113, .value = STALE_NAN}, }, - decode_current_chunk(serialized, 20))); -} - -class DeserializerIteratorFixtureNew : public SerializerDeserializerTrait, public testing::Test { - protected: - using DecodedChunks = std::vector; - - DecodedChunks decode_chunks(SerializedData& serialized_data) const { - DecodedChunks result; - while (serialized_data.next_series() != SerializedData::kNoMoreSeries) { - SampleList samples; - std::ranges::copy(serialized_data.create_current_series_range(), std::back_insert_iterator(samples)); - result.emplace_back(samples); - } - return result; - } -}; - -TEST_F(DeserializerIteratorFixtureNew, EmptyChunksList) { - // Arrange - - // Act - SerializedData serialized({}); - auto decoded_chunks = decode_chunks(serialized); - - // Assert - EXPECT_TRUE(std::ranges::equal(DecodedChunks{}, decoded_chunks)); -} - -TEST_F(DeserializerIteratorFixtureNew, OneChunk) { - // Arrange - encoder_.encode(0, 1, 1.0); - encoder_.encode(0, 2, 1.0); - - // Act - SerializedData serialized(storage_, {QueriedChunk{0}}); - auto decoded_chunks = decode_chunks(serialized); - - // Assert - EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}}}, decoded_chunks)); -} - -TEST_F(DeserializerIteratorFixtureNew, OneChunkFinalized) { - // Arrange - encoder_.encode(0, 1, 1.0); - encoder_.encode(0, 2, 1.0); - ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); - encoder_.encode(0, 3, 1.0); - encoder_.encode(0, 4, 1.0); - - // Act - SerializedData serialized(storage_); - auto decoded_chunks = decode_chunks(serialized); - - // Assert - EXPECT_TRUE(std::ranges::equal( - DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, {.timestamp = 2, .value = 1.0}, {.timestamp = 3, .value = 1.0}, {.timestamp = 4, .value = 1.0}}}, - decoded_chunks)); -} - -TEST_F(DeserializerIteratorFixtureNew, OneChunkSeveralFinalized) { - // Arrange - encoder_.encode(0, 1, 1.0); - encoder_.encode(0, 2, 1.0); - ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); - encoder_.encode(0, 3, 1.0); - encoder_.encode(0, 4, 1.0); - ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); - encoder_.encode(0, 5, 1.0); - encoder_.encode(0, 6, 1.0); - ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); - encoder_.encode(0, 7, 1.0); - encoder_.encode(0, 8, 1.0); - - // Act - SerializedData serialized(storage_); - auto decoded_chunks = decode_chunks(serialized); - - // Assert - EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}, - {.timestamp = 2, .value = 1.0}, - {.timestamp = 3, .value = 1.0}, - {.timestamp = 4, .value = 1.0}, - {.timestamp = 5, .value = 1.0}, - {.timestamp = 6, .value = 1.0}, - {.timestamp = 7, .value = 1.0}, - {.timestamp = 8, .value = 1.0}}}, - decoded_chunks)); -} - -TEST_F(DeserializerIteratorFixtureNew, TwoChunks) { - // Arrange - encoder_.encode(0, 1, 1.0); - encoder_.encode(1, 2, 1.0); - - // Act - SerializedData serialized(storage_, {QueriedChunk{0}, QueriedChunk{1}}); - auto decoded_chunks = decode_chunks(serialized); - - // Assert - EXPECT_TRUE(std::ranges::equal(DecodedChunks{SampleList{{.timestamp = 1, .value = 1.0}}, SampleList{{.timestamp = 2, .value = 1.0}}}, decoded_chunks)); + decode_current_chunk(serialized_view, 20))); } } // namespace \ No newline at end of file From 4930677f0f15e2a006d1928ebd81bc3fa671fc7f Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Thu, 9 Oct 2025 21:20:24 +0300 Subject: [PATCH 14/17] tests fix + coverage --- .../serialization/serialized_data.h | 2 +- .../serializer_deserializer_new_tests.cpp | 199 ++++++++++++++++++ 2 files changed, 200 insertions(+), 1 deletion(-) diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 7e6f7ede6c..3b5735e0f6 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -299,4 +299,4 @@ class SerializedDataView { const SerializedData& data_; uint32_t series_index_; }; -}; // namespace series_data::serialization \ No newline at end of file +} // namespace series_data::serialization \ No newline at end of file diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index 9b7c806b5c..a95f070302 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -92,6 +92,53 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta decode_current_chunk(serialized_view, 1))); } +TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantFinalizedChunkWithCommonTimestampStream) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(1, 1, 1.0); + + encoder_.encode(0, 2, 1.0); + encoder_.encode(1, 2, 1.0); + + encoder_.encode(0, 3, 1.0); + encoder_.encode(1, 3, 1.0); + + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + ChunkFinalizer::finalize(storage_, 1, storage_.open_chunks[1]); + encoder_.encode(0, 4, 1.0); + encoder_.encode(1, 4, 1.0); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + // Assert + ASSERT_EQ(4U, serialized_view.get_chunks().size()); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[0].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[1].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[2].encoding_state.encoding_type); + ASSERT_EQ(EncodingType::kUint32Constant, serialized_view.get_chunks()[3].encoding_state.encoding_type); + EXPECT_EQ(serialized_view.get_chunks()[0].timestamps_offset, serialized_view.get_chunks()[2].timestamps_offset); + EXPECT_EQ(serialized_view.get_chunks()[1].timestamps_offset, serialized_view.get_chunks()[3].timestamps_offset); + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + {.timestamp = 4, .value = 1.0}, + }, + decode_current_chunk(serialized_view, 0))); + + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 1, .value = 1.0}, + {.timestamp = 2, .value = 1.0}, + {.timestamp = 3, .value = 1.0}, + {.timestamp = 4, .value = 1.0}, + }, + decode_current_chunk(serialized_view, 1))); +} + TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUniqueTimestampStream) { // Arrange encoder_.encode(0, 1, 1.0); @@ -405,6 +452,8 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { encoder_.encode(0, 102, 1.0); ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); encoder_.encode(0, 103, 1.0); + encoder_.encode(0, 104, 1.0); + encoder_.encode(0, 105, 1.0); // Act SerializedData serialized = DataSerializer::serialize(storage_); @@ -417,10 +466,67 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { {.timestamp = 101, .value = 1.0}, {.timestamp = 102, .value = 1.0}, {.timestamp = 103, .value = 1.0}, + {.timestamp = 104, .value = 1.0}, + {.timestamp = 105, .value = 1.0}, + }, + decode_current_chunk(serialized_view, 0))); +} + +TEST_F(SerializerDeserializerFixtureNew, QueryFinalizedOnly) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(0, 101, 1.0); + encoder_.encode(0, 102, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 103, 1.0); + encoder_.encode(0, 104, 1.0); + encoder_.encode(0, 105, 1.0); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0, 0}}); + SerializedDataView serialized_view(serialized); + + // Assert + EXPECT_TRUE(std::ranges::equal( + SampleList{ + {.timestamp = 100, .value = 1.0}, + {.timestamp = 101, .value = 1.0}, + {.timestamp = 102, .value = 1.0}, }, decode_current_chunk(serialized_view, 0))); } +TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesIdWithSeveralFinalized) { + // Arrange + encoder_.encode(0, 100, 1.0); + encoder_.encode(0, 101, 2.0); + encoder_.encode(0, 102, 3.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 103, 4.0); + encoder_.encode(0, 104, 5.0); + encoder_.encode(0, 105, 6.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 106, 7.0); + encoder_.encode(0, 107, 8.0); + encoder_.encode(0, 108, 9.0); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + // Assert + EXPECT_TRUE(std::ranges::equal(SampleList{{.timestamp = 100, .value = 1.0}, + {.timestamp = 101, .value = 2.0}, + {.timestamp = 102, .value = 3.0}, + {.timestamp = 103, .value = 4.0}, + {.timestamp = 104, .value = 5.0}, + {.timestamp = 105, .value = 6.0}, + {.timestamp = 106, .value = 7.0}, + {.timestamp = 107, .value = 8.0}, + {.timestamp = 108, .value = 9.0}}, + decode_current_chunk(serialized_view, 0))); +} + TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { // Arrange encoder_.encode(0, 100, 1.0); @@ -702,4 +808,97 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { }, decode_current_chunk(serialized_view, 20))); } + +class SerializedDataNextIterFixture : public SerializerDeserializerTrait, public testing::Test { + protected: + std::vector get_chunks_ids(SerializedDataView& view) const { + std::vector ans{}; + uint32_t id = view.next_series(); + while (id != SerializedDataView::kNoMoreSeries) { + ans.push_back(id); + id = view.next_series(); + } + return ans; + } +}; + +TEST_F(SerializedDataNextIterFixture, EmptyChunksList) { + // Arrange + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + auto ids = get_chunks_ids(serialized_view); + + // Assert + EXPECT_TRUE(ids.empty()); + EXPECT_EQ(SerializedDataView::kNoMoreSeries, serialized_view.next_series()); +} + +TEST_F(SerializedDataNextIterFixture, OneChunk) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(0, 2, 1.0); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + auto ids = get_chunks_ids(serialized_view); + + // Assert + EXPECT_TRUE(std::ranges::equal(ids, std::initializer_list{0u})); + EXPECT_EQ(SerializedDataView::kNoMoreSeries, serialized_view.next_series()); +} + +TEST_F(SerializedDataNextIterFixture, OneChunkFinalized) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(0, 2, 1.0); + ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); + encoder_.encode(0, 3, 1.0); + encoder_.encode(0, 4, 1.0); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + auto ids = get_chunks_ids(serialized_view); + + // Assert + EXPECT_TRUE(std::ranges::equal(ids, std::initializer_list{0u})); + EXPECT_EQ(SerializedDataView::kNoMoreSeries, serialized_view.next_series()); +} + +TEST_F(SerializedDataNextIterFixture, SeveralChunks) { + // Arrange + encoder_.encode(0, 1, 1.0); + encoder_.encode(1, 1, 1.0); + + encoder_.encode(0, 2, 1.0); + encoder_.encode(1, 2, 1.0); + + encoder_.encode(0, 3, 1.0); + encoder_.encode(1, 3, 1.0); + + encoder_.encode(2, 1, 2.0); + encoder_.encode(2, 2, 2.0); + encoder_.encode(2, 3, 2.0); + + encoder_.encode(100, 4, 2.1); + encoder_.encode(100, 5, 2.2); + encoder_.encode(100, 7, 2.3); + + // Act + SerializedData serialized = DataSerializer::serialize(storage_); + SerializedDataView serialized_view(serialized); + + auto ids = get_chunks_ids(serialized_view); + + // Assert + EXPECT_TRUE(std::ranges::equal(ids, std::initializer_list{0u, 1u, 2u, 100u})); + EXPECT_EQ(SerializedDataView::kNoMoreSeries, serialized_view.next_series()); +} + } // namespace \ No newline at end of file From 1e18c7c95e1da649ca0defc4ca36440199048e93 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Thu, 9 Oct 2025 21:23:13 +0300 Subject: [PATCH 15/17] fix --- pp/series_data/serialization/serialized_data.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 3b5735e0f6..04067c33bf 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -282,9 +282,9 @@ class SerializedDataView { } const uint32_t current_series_id = chunks[series_index_].label_set_id; - while (series_index_ < chunks.size() && current_series_id == chunks[series_index_].label_set_id) { + do { ++series_index_; - } + } while (series_index_ < chunks.size() && chunks[series_index_].label_set_id == current_series_id); if (series_index_ == chunks.size()) [[unlikely]] { return kNoMoreSeries; From 06796b24db95e92806c6bc082f13f706eed40aa4 Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 10 Oct 2025 16:15:19 +0300 Subject: [PATCH 16/17] review fixes --- pp/entrypoint/head/serialization.h | 4 +- pp/entrypoint/series_data_data_storage.cpp | 4 +- ...ies_data_serialization_serialized_data.cpp | 15 ++- pp/go/cppbridge/entrypoint.h | 40 ++++---- .../serialization/serialized_data.h | 91 ++++++++++--------- .../serializer_deserializer_new_tests.cpp | 39 ++++---- 6 files changed, 99 insertions(+), 94 deletions(-) diff --git a/pp/entrypoint/head/serialization.h b/pp/entrypoint/head/serialization.h index eb732d1943..e94de76244 100644 --- a/pp/entrypoint/head/serialization.h +++ b/pp/entrypoint/head/serialization.h @@ -7,7 +7,7 @@ namespace entrypoint::head { class SerializedDataGo { public: explicit SerializedDataGo(const series_data::DataStorage& storage, const series_data::querier::QueriedChunkList& queried_chunks) - : data_{series_data::serialization::DataSerializer::serialize(storage, queried_chunks)} {} + : data_{series_data::serialization::DataSerializer{storage}.serialize(queried_chunks)} {} [[nodiscard]] PROMPP_ALWAYS_INLINE auto get_buffer() const noexcept { return data_view_.get_buffer(); } [[nodiscard]] PROMPP_ALWAYS_INLINE auto get_chunks() const noexcept { return data_view_.get_chunks(); } @@ -21,7 +21,7 @@ class SerializedDataGo { }; using SerializedDataPtr = std::unique_ptr; -using SerializedDataIteratorPtr = std::unique_ptr; +using SerializedDataIteratorPtr = std::unique_ptr; static_assert(sizeof(SerializedDataPtr) == sizeof(void*)); static_assert(sizeof(SerializedDataIteratorPtr) == sizeof(void*)); diff --git a/pp/entrypoint/series_data_data_storage.cpp b/pp/entrypoint/series_data_data_storage.cpp index 40bb15bced..c473d8a172 100644 --- a/pp/entrypoint/series_data_data_storage.cpp +++ b/pp/entrypoint/series_data_data_storage.cpp @@ -155,12 +155,12 @@ extern "C" void prompp_series_data_data_storage_query_new(void* args, void* res) struct Result { QuerierVariantPtr querier{}; - QueryStatus status; + QueryStatus status{}; entrypoint::head::SerializedDataPtr* serialized_data{}; }; const auto in = static_cast(args); - auto* out = new (res) Result(); + const auto out = static_cast(res); RangeQuerierWithArgumentsWrapperNew querier(*in->data_storage, in->query, out->serialized_data); querier.query(); diff --git a/pp/entrypoint/series_data_serialization_serialized_data.cpp b/pp/entrypoint/series_data_serialization_serialized_data.cpp index 28e4f4e8bf..222ad64b12 100644 --- a/pp/entrypoint/series_data_serialization_serialized_data.cpp +++ b/pp/entrypoint/series_data_serialization_serialized_data.cpp @@ -4,27 +4,27 @@ extern "C" void prompp_series_data_serialization_serialized_data_next(void* args, void* res) { struct Arguments { - entrypoint::head::SerializedDataPtr* serialized_data; + entrypoint::head::SerializedDataPtr serialized_data; }; using Result = struct { uint32_t series_id; }; - new (res) Result{.series_id = reinterpret_cast(args)->serialized_data->get()->next()}; + new (res) Result{.series_id = static_cast(args)->serialized_data->next()}; } extern "C" void prompp_series_data_serialization_serialized_data_iterator(void* args, void* res) { struct Arguments { - entrypoint::head::SerializedDataPtr* serialized_data; + entrypoint::head::SerializedDataPtr serialized_data; }; using Result = struct { entrypoint::head::SerializedDataIteratorPtr iterator; }; - new (res) Result{.iterator = std::make_unique( - static_cast(args)->serialized_data->get()->iterator())}; + new (res) Result{ + .iterator = std::make_unique(static_cast(args)->serialized_data->iterator())}; } extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res) { @@ -40,7 +40,7 @@ extern "C" void prompp_series_data_serialization_serialized_data_iterator_next(v bool has_value; }; - Arguments* in = reinterpret_cast(args); + const Arguments* in = static_cast(args); if (*in->iterator == DecodeIteratorSentinel{}) { new (res) Result{.has_value = false}; @@ -61,9 +61,8 @@ extern "C" void prompp_series_data_serialization_serialized_data_iterator_dtor(v extern "C" void prompp_series_data_serialization_serialized_data_dtor(void* args) { struct Arguments { - entrypoint::head::SerializedDataPtr* serialized_data; + entrypoint::head::SerializedDataPtr serialized_data; }; - std::destroy_at(static_cast(args)->serialized_data); static_cast(args)->~Arguments(); } \ No newline at end of file diff --git a/pp/go/cppbridge/entrypoint.h b/pp/go/cppbridge/entrypoint.h index d68740072e..e4710654d7 100755 --- a/pp/go/cppbridge/entrypoint.h +++ b/pp/go/cppbridge/entrypoint.h @@ -1206,12 +1206,12 @@ void prompp_series_data_data_storage_query(void* args, void* res); * @param args { * dataStorage uintptr // pointer to constructed data storage * query DataStorageQuery // query - * serializedData uintptr // pointer to serialized data * } * * @param res { - * Querier uintptr // pointer to constructed Querier if data loading is needed - * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * Querier uintptr // pointer to constructed Querier if data loading is needed + * Status uint8 // status of a query (0 - Success, 1 - Data loading is needed) + * serializedData uintptr // pointer to serialized data * } */ void prompp_series_data_data_storage_query_new(void* args, void* res); @@ -1286,6 +1286,22 @@ void prompp_series_data_chunk_recoder_ctor(void* args, void* res); */ void prompp_series_data_serialized_chunk_recoder_ctor(void* args, void* res); +/** + * @brief Construct a new ChunkRecoder object to recode all serialized chunks (new model) + * + * @param args { + * serializedData uintptr // pointer to serialized data + * time_interval struct { // closed interval [min, max] + * min int64 + * max int64 + * } + * } + * @param res { + * chunk_recoder uintptr // pointer to chunk recoder + * } + */ +void prompp_series_data_serialized_chunk_recoder_new_ctor(void* args, void* res); + /** * @brief Get chunk encoded in prometheus format * @@ -1590,24 +1606,12 @@ void prompp_series_data_serialization_serialized_data_iterator(void* args, void* * } * * @param res { - * has_data bool // is iterator has more data to decode. - * } - */ -void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); - -/** - * @brief Extract sample form decode iterator. - * - * @param args { - * iterator uintptr // pointer to decode iterator - * } - * - * @param res { + * has_data bool // is iterator has more data to decode. * timestamp int64 // sample timestamp - * value float64 // sample value + * value float64 // sample value * } */ -void prompp_series_data_serialization_serialized_data_iterator_sample(void* args, void* res); +void prompp_series_data_serialization_serialized_data_iterator_next(void* args, void* res); /** * @brief Destroy decode iterator. diff --git a/pp/series_data/serialization/serialized_data.h b/pp/series_data/serialization/serialized_data.h index 04067c33bf..0515caf8d2 100644 --- a/pp/series_data/serialization/serialized_data.h +++ b/pp/series_data/serialization/serialized_data.h @@ -9,18 +9,20 @@ namespace series_data::serialization { struct SerializedData { + using Memory = BareBones::Memory; + BareBones::Vector chunks; - BareBones::Memory bytes_buffer; + Memory bytes_buffer; [[nodiscard]] PROMPP_ALWAYS_INLINE uint32_t allocated_memory() const noexcept { return chunks.allocated_memory() + bytes_buffer.allocated_memory(); } }; class DataSerializer { public: - static SerializedData serialize(const DataStorage& storage, const querier::QueriedChunkList& queried_chunks) noexcept { - return serialize_internal(storage, queried_chunks); - } - static SerializedData serialize(const DataStorage& storage) noexcept { return serialize_internal(storage, storage.chunks()); } + explicit DataSerializer(const DataStorage& storage) : storage_(storage) {} + + SerializedData serialize(const querier::QueriedChunkList& queried_chunks) noexcept { return serialize_internal(queried_chunks); } + SerializedData serialize() noexcept { return serialize_internal(storage_.chunks()); } private: struct TimestampStreamsData { @@ -34,31 +36,30 @@ class DataSerializer { }; template - static SerializedData serialize_internal(const DataStorage& storage, const ChunkList& chunks) noexcept { + SerializedData serialize_internal(const ChunkList& chunks) noexcept { const auto& kReservedBytesForReader = encoder::CompactBitSequence::reserved_bytes_for_reader(); SerializedData serialized_data; serialized_data.chunks.reserve(get_chunk_count(chunks)); - uint32_t data_size = 0; - TimestampStreamsData timestamp_streams_data; for (auto& chunk_data : chunks) { using enum chunk::DataChunk::Type; if (chunk_data.is_open()) [[likely]] { - if (const auto& chunk = get_chunk(storage, chunk_data); !chunk.is_empty()) [[likely]] { - fill_serialized_chunk(storage, chunk, serialized_data.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, data_size, + if (const auto& chunk = get_chunk(chunk_data); !chunk.is_empty()) [[likely]] { + fill_serialized_chunk(chunk, serialized_data.chunks.emplace_back(chunk_data.series_id()), timestamp_streams_data, serialized_data.bytes_buffer); } } else { - fill_serialized_chunk(storage, get_chunk(storage, chunk_data), serialized_data.chunks.emplace_back(chunk_data.series_id()), - timestamp_streams_data, data_size, serialized_data.bytes_buffer); + fill_serialized_chunk(get_chunk(chunk_data), serialized_data.chunks.emplace_back(chunk_data.series_id()), + timestamp_streams_data, serialized_data.bytes_buffer); } } - serialized_data.bytes_buffer.grow_to_fit_at_least(data_size + kReservedBytesForReader.size()); - std::memcpy(serialized_data.bytes_buffer.control_block().data + data_size, kReservedBytesForReader.data(), kReservedBytesForReader.size()); + serialized_data.bytes_buffer.grow_to_fit_at_least(serialized_data.bytes_buffer.control_block().items_count + kReservedBytesForReader.size()); + std::memcpy(serialized_data.bytes_buffer + serialized_data.bytes_buffer.control_block().items_count, kReservedBytesForReader.data(), + kReservedBytesForReader.size()); return serialized_data; } @@ -73,18 +74,18 @@ class DataSerializer { } template - static void fill_serialized_chunk(const DataStorage& storage, - const chunk::DataChunk& chunk, - chunk::SerializedChunk& serialized_chunk, - TimestampStreamsData& timestamp_streams_data, - uint32_t& data_size, - BareBones::Memory& buffer) noexcept { + void fill_serialized_chunk(const chunk::DataChunk& chunk, + chunk::SerializedChunk& serialized_chunk, + TimestampStreamsData& timestamp_streams_data, + SerializedData::Memory& buffer) noexcept { using enum EncodingType; serialized_chunk.encoding_state = chunk.encoding_state; + uint32_t& data_size = buffer.control_block().items_count; + if (chunk.encoding_state.encoding_type != kGorilla) [[likely]] { - fill_timestamp_stream_offset(storage, timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, data_size, buffer); + fill_timestamp_stream_offset(storage_, timestamp_streams_data, chunk.timestamp_encoder_state_id, serialized_chunk, buffer); } switch (chunk.encoding_state.encoding_type) { @@ -101,7 +102,8 @@ class DataSerializer { case kDoubleConstant: { serialized_chunk.set_offset(data_size); buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::DoubleConstantEncoder)); - std::memcpy(buffer + data_size, &storage.variant_encoders[chunk.encoder.external_index].double_constant, sizeof(encoder::value::DoubleConstantEncoder)); + std::memcpy(buffer + data_size, &storage_.variant_encoders[chunk.encoder.external_index].double_constant, + sizeof(encoder::value::DoubleConstantEncoder)); data_size += sizeof(encoder::value::DoubleConstantEncoder); break; } @@ -109,7 +111,7 @@ class DataSerializer { case kTwoDoubleConstant: { serialized_chunk.set_offset(data_size); buffer.grow_to_fit_at_least(data_size + sizeof(encoder::value::TwoDoubleConstantEncoder)); - std::memcpy(buffer + data_size, &storage.variant_encoders[chunk.encoder.external_index].two_double_constant, + std::memcpy(buffer + data_size, &storage_.variant_encoders[chunk.encoder.external_index].two_double_constant, sizeof(encoder::value::TwoDoubleConstantEncoder)); data_size += sizeof(encoder::value::TwoDoubleConstantEncoder); break; @@ -117,25 +119,25 @@ class DataSerializer { case kAscInteger: { serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage.get_asc_integer_stream(chunk.encoder.external_index), data_size, buffer); + write_compact_bit_sequence(storage_.get_asc_integer_stream(chunk.encoder.external_index), buffer); break; } case kAscIntegerThenValuesGorilla: { serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage.get_asc_integer_then_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + write_compact_bit_sequence(storage_.get_asc_integer_then_values_gorilla_stream(chunk.encoder.external_index), buffer); break; } case kValuesGorilla: { serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage.get_values_gorilla_stream(chunk.encoder.external_index), data_size, buffer); + write_compact_bit_sequence(storage_.get_values_gorilla_stream(chunk.encoder.external_index), buffer); break; } case kGorilla: { serialized_chunk.set_offset(data_size); - write_compact_bit_sequence(storage.get_gorilla_encoder_stream(chunk.encoder.external_index), data_size, buffer); + write_compact_bit_sequence(storage_.get_gorilla_encoder_stream(chunk.encoder.external_index), buffer); break; } @@ -146,18 +148,18 @@ class DataSerializer { } template - [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage& storage, const querier::QueriedChunk& queried_chunk) noexcept { + [[nodiscard]] const chunk::DataChunk& get_chunk(const querier::QueriedChunk& queried_chunk) const noexcept { if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { - return storage.open_chunks[queried_chunk.series_id()]; + return storage_.open_chunks[queried_chunk.series_id()]; } else { - auto finalized_chunk_it = storage.finalized_chunks.find(queried_chunk.series_id())->second.begin(); + auto finalized_chunk_it = storage_.finalized_chunks.find(queried_chunk.series_id())->second.begin(); std::advance(finalized_chunk_it, queried_chunk.finalized_chunk_id); return *finalized_chunk_it; } } template - [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage&, const DataStorage::SeriesChunkIterator::Data& chunk) noexcept { + [[nodiscard]] static const chunk::DataChunk& get_chunk(const DataStorage::SeriesChunkIterator::Data& chunk) noexcept { return chunk.chunk(); } @@ -166,13 +168,13 @@ class DataSerializer { TimestampStreamsData& timestamp_streams_data, encoder::timestamp::State::Id timestamp_stream_id, chunk::SerializedChunk& serialized_chunk, - uint32_t& data_size, - BareBones::Memory& buffer) noexcept { + SerializedData::Memory& buffer) noexcept { + uint32_t data_size = buffer.control_block().items_count; if constexpr (chunk_type == chunk::DataChunk::Type::kOpen) { if (const auto it = timestamp_streams_data.stream_offsets.find(timestamp_stream_id); it == timestamp_streams_data.stream_offsets.end()) [[unlikely]] { timestamp_streams_data.stream_offsets.emplace(timestamp_stream_id, data_size); serialized_chunk.timestamps_offset = data_size; - write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, buffer); } else { serialized_chunk.timestamps_offset = it->second; } @@ -181,7 +183,7 @@ class DataSerializer { it == timestamp_streams_data.finalized_stream_offsets.end()) [[unlikely]] { timestamp_streams_data.finalized_stream_offsets.emplace(timestamp_stream_id, data_size); serialized_chunk.timestamps_offset = data_size; - write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, data_size, buffer); + write_compact_bit_sequence(storage.get_timestamp_stream(timestamp_stream_id).stream, buffer); } else { serialized_chunk.timestamps_offset = it->second; } @@ -189,21 +191,22 @@ class DataSerializer { } template - static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, - uint32_t& data_size, - BareBones::Memory& buffer) noexcept { + static void write_compact_bit_sequence(const CompactBitSequence& bit_sequence, SerializedData::Memory& buffer) noexcept { const auto bytes_count = bit_sequence.size_in_bytes(); + uint32_t& data_size = buffer.control_block().items_count; buffer.grow_to_fit_at_least(data_size + bytes_count); std::memcpy(buffer + data_size, bit_sequence.raw_bytes(), bytes_count); data_size += bytes_count; } + + const DataStorage& storage_; }; class SerializedDataView { public: static constexpr uint32_t kNoMoreSeries = std::numeric_limits::max(); - class SerializedSeriesIterator { + class SeriesIterator { public: using iterator_category = std::forward_iterator_tag; using value_type = encoder::Sample; @@ -211,9 +214,7 @@ class SerializedDataView { using pointer = value_type*; using reference = value_type&; - SerializedSeriesIterator(const BareBones::Memory& buffer, - chunk::SerializedChunkSpan chunks, - uint32_t chunk_id) + SeriesIterator(const SerializedData::Memory& buffer, chunk::SerializedChunkSpan chunks, uint32_t chunk_id) : decode_iter_(std::in_place_type, 0, BareBones::BitSequenceReader(nullptr, 0), 0, false), chunk_iter_(chunks.begin() + chunk_id), series_id_(chunk_iter_->label_set_id), @@ -227,7 +228,7 @@ class SerializedDataView { [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample& operator*() const noexcept { return *decode_iter_; } [[nodiscard]] PROMPP_ALWAYS_INLINE const encoder::Sample* operator->() const noexcept { return decode_iter_.operator->(); } - PROMPP_ALWAYS_INLINE SerializedSeriesIterator& operator++() noexcept { + PROMPP_ALWAYS_INLINE SeriesIterator& operator++() noexcept { ++decode_iter_; if (decode_iter_ == decoder::DecodeIteratorSentinel{}) [[unlikely]] { if (std::next(chunk_iter_) != chunks_.end() && series_id_ == std::next(chunk_iter_)->label_set_id) { @@ -240,7 +241,7 @@ class SerializedDataView { return *this; } - PROMPP_ALWAYS_INLINE SerializedSeriesIterator operator++(int) noexcept { + PROMPP_ALWAYS_INLINE SeriesIterator operator++(int) noexcept { const auto it = *this; ++*this; return it; @@ -293,7 +294,7 @@ class SerializedDataView { return chunks[series_index_].label_set_id; } - [[nodiscard]] SerializedSeriesIterator create_current_series_iterator() const noexcept { return {data_.bytes_buffer, get_chunks(), series_index_}; } + [[nodiscard]] SeriesIterator create_current_series_iterator() const noexcept { return {data_.bytes_buffer, get_chunks(), series_index_}; } private: const SerializedData& data_; diff --git a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp index a95f070302..ddf88047d1 100644 --- a/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp +++ b/pp/series_data/tests/serialization/serializer_deserializer_new_tests.cpp @@ -29,6 +29,7 @@ class SerializerDeserializerTrait { protected: DataStorage storage_; Encoder<> encoder_{storage_}; + DataSerializer serializer_{storage_}; [[nodiscard]] PROMPP_ALWAYS_INLINE static SampleList decode_current_chunk(SerializedDataView& data, uint32_t series_id) { SampleList result; @@ -47,8 +48,8 @@ TEST_F(SerializerDeserializerFixtureNew, EmptyChunksList) { // Arrange // Act - SerializedData serialized = DataSerializer::serialize(storage_, {}); - SerializedDataView serialized_view(serialized); + const SerializedData serialized = serializer_.serialize({}); + const SerializedDataView serialized_view(serialized); // Assert ASSERT_EQ(0U, serialized_view.get_chunks().size()); @@ -67,7 +68,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantChunkWithCommonTimesta encoder_.encode(1, 3, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0}, QueriedChunk{1}}); + const SerializedData serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}}); SerializedDataView serialized_view(serialized); // Assert @@ -109,7 +110,7 @@ TEST_F(SerializerDeserializerFixtureNew, TwoUint32ConstantFinalizedChunkWithComm encoder_.encode(1, 4, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -155,7 +156,7 @@ TEST_F(SerializerDeserializerFixtureNew, ThreeUint32ConstantChunkWithCommonAndUn encoder_.encode(2, 3, 2.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); + const SerializedData serialized = serializer_.serialize({QueriedChunk{0}, QueriedChunk{1}, QueriedChunk{2}}); SerializedDataView serialized_view(serialized); // Assert @@ -221,7 +222,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypes) { encoder_.encode(8, 123, 4.1); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -346,7 +347,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypes) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -434,7 +435,7 @@ TEST_F(SerializerDeserializerFixtureNew, ChunkWithFinalizedTimestampStream) { ChunkFinalizer::finalize(storage_, 0, storage_.open_chunks[0]); // Act - SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{1}}); + const SerializedData serialized = serializer_.serialize({QueriedChunk{1}}); SerializedDataView serialized_view(serialized); // Assert @@ -456,7 +457,7 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesId) { encoder_.encode(0, 105, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -483,7 +484,7 @@ TEST_F(SerializerDeserializerFixtureNew, QueryFinalizedOnly) { encoder_.encode(0, 105, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_, {QueriedChunk{0, 0}}); + const SerializedData serialized = serializer_.serialize({QueriedChunk{0, 0}}); SerializedDataView serialized_view(serialized); // Assert @@ -511,7 +512,7 @@ TEST_F(SerializerDeserializerFixtureNew, MultipleChunksOnOneSeriesIdWithSeveralF encoder_.encode(0, 108, 9.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -571,7 +572,7 @@ TEST_F(SerializerDeserializerFixtureNew, AllChunkTypesWithStalenan) { encoder_.encode(8, 134, STALE_NAN); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -717,7 +718,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { ChunkFinalizer::finalize(storage_, 8, storage_.open_chunks[8]); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); // Assert @@ -811,7 +812,7 @@ TEST_F(SerializerDeserializerFixtureNew, FinalizedAllChunkTypesWithStalenan) { class SerializedDataNextIterFixture : public SerializerDeserializerTrait, public testing::Test { protected: - std::vector get_chunks_ids(SerializedDataView& view) const { + static std::vector get_chunks_ids(SerializedDataView& view) { std::vector ans{}; uint32_t id = view.next_series(); while (id != SerializedDataView::kNoMoreSeries) { @@ -826,10 +827,10 @@ TEST_F(SerializedDataNextIterFixture, EmptyChunksList) { // Arrange // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); - auto ids = get_chunks_ids(serialized_view); + const auto ids = get_chunks_ids(serialized_view); // Assert EXPECT_TRUE(ids.empty()); @@ -842,7 +843,7 @@ TEST_F(SerializedDataNextIterFixture, OneChunk) { encoder_.encode(0, 2, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); auto ids = get_chunks_ids(serialized_view); @@ -861,7 +862,7 @@ TEST_F(SerializedDataNextIterFixture, OneChunkFinalized) { encoder_.encode(0, 4, 1.0); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); auto ids = get_chunks_ids(serialized_view); @@ -891,7 +892,7 @@ TEST_F(SerializedDataNextIterFixture, SeveralChunks) { encoder_.encode(100, 7, 2.3); // Act - SerializedData serialized = DataSerializer::serialize(storage_); + const SerializedData serialized = serializer_.serialize(); SerializedDataView serialized_view(serialized); auto ids = get_chunks_ids(serialized_view); From 172458778ab20151205b92af2dff45bf4201a91f Mon Sep 17 00:00:00 2001 From: Gleb Shigin Date: Fri, 10 Oct 2025 16:26:33 +0300 Subject: [PATCH 17/17] benchmark fix --- .../benchmarks/serializer_benchmark.cpp | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pp/series_data/benchmarks/serializer_benchmark.cpp b/pp/series_data/benchmarks/serializer_benchmark.cpp index 6f54fd9052..07b10ad05f 100644 --- a/pp/series_data/benchmarks/serializer_benchmark.cpp +++ b/pp/series_data/benchmarks/serializer_benchmark.cpp @@ -65,7 +65,7 @@ series_data::querier::QueriedChunkList generate_query(uint32_t size) { void BenchmarkWalSerializer(benchmark::State& state) { const auto& samples = get_samples_for_benchmark(); - const double percent = state.range(0) / 100.0; + const double percent = static_cast(state.range(0)) / 100.0; const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); const auto min_ts = min->timestamp; const auto max_ts = max->timestamp; @@ -75,7 +75,7 @@ void BenchmarkWalSerializer(benchmark::State& state) { series_data::Encoder encoder{storage}; for (const auto& sample : samples) { - if (sample.timestamp < min_ts + delta_ts * percent) { + if (sample.timestamp <= min_ts + static_cast(static_cast(delta_ts) * percent)) { encoder.encode(sample.series_id, sample.timestamp, sample.value); } } @@ -96,13 +96,14 @@ void BenchmarkWalSerializer(benchmark::State& state) { PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(slice.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["Stream Size"] = + benchmark::Counter(static_cast(slice.allocated_memory()), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } void BenchmarkWalConstantSerializer(benchmark::State& state) { const auto& samples = get_samples_for_benchmark(); - const double percent = state.range(0) / 100.0; + const double percent = static_cast(state.range(0)) / 100.0; const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); const auto min_ts = min->timestamp; const auto max_ts = max->timestamp; @@ -112,7 +113,7 @@ void BenchmarkWalConstantSerializer(benchmark::State& state) { series_data::Encoder encoder{storage}; for (const auto& sample : samples) { - if (sample.timestamp <= min_ts + delta_ts * percent) { + if (sample.timestamp <= min_ts + static_cast(static_cast(delta_ts) * percent)) { encoder.encode(sample.series_id, sample.timestamp, sample.series_id); } } @@ -133,13 +134,14 @@ void BenchmarkWalConstantSerializer(benchmark::State& state) { PromPP::Primitives::Go::BytesStream stream{&slice}; serializer_.serialize(chunk_list, stream); - state.counters["Stream Size"] = benchmark::Counter(slice.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); + state.counters["Stream Size"] = + benchmark::Counter(static_cast(slice.allocated_memory()), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } void BenchmarkWalSerializedData(benchmark::State& state) { const auto& samples = get_samples_for_benchmark(); - const double percent = state.range(0) / 100.0; + const double percent = static_cast(state.range(0)) / 100.0; const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); const auto min_ts = min->timestamp; const auto max_ts = max->timestamp; @@ -149,7 +151,7 @@ void BenchmarkWalSerializedData(benchmark::State& state) { series_data::Encoder encoder{storage}; for (const auto& sample : samples) { - if (sample.timestamp < min_ts + delta_ts * percent) { + if (sample.timestamp <= min_ts + static_cast(static_cast(delta_ts) * percent)) { encoder.encode(sample.series_id, sample.timestamp, sample.value); } } @@ -157,19 +159,19 @@ void BenchmarkWalSerializedData(benchmark::State& state) { const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { - SerializedData serialized = DataSerializer::serialize(storage, chunk_list); + SerializedData serialized = DataSerializer{storage}.serialize(chunk_list); benchmark::DoNotOptimize(serialized); } { - SerializedData serialized = DataSerializer::serialize(storage, chunk_list); + const SerializedData serialized = DataSerializer{storage}.serialize(chunk_list); state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } } void BenchmarkWalConstantSerializedData(benchmark::State& state) { const auto& samples = get_samples_for_benchmark(); - const double percent = state.range(0) / 100.0; + const double percent = static_cast(state.range(0)) / 100.0; const auto [min, max] = std::ranges::minmax_element(samples, [](auto a, auto b) { return a.timestamp < b.timestamp; }); const auto min_ts = min->timestamp; const auto max_ts = max->timestamp; @@ -179,7 +181,7 @@ void BenchmarkWalConstantSerializedData(benchmark::State& state) { series_data::Encoder encoder{storage}; for (const auto& sample : samples) { - if (sample.timestamp <= min_ts + delta_ts * percent) { + if (sample.timestamp <= min_ts + static_cast(static_cast(delta_ts) * percent)) { encoder.encode(sample.series_id, sample.timestamp, sample.series_id); } } @@ -187,12 +189,12 @@ void BenchmarkWalConstantSerializedData(benchmark::State& state) { const series_data::querier::QueriedChunkList chunk_list = generate_query(storage.open_chunks.size()); for ([[maybe_unused]] auto _ : state) { - SerializedData serialized = DataSerializer::serialize(storage, chunk_list); + SerializedData serialized = DataSerializer{storage}.serialize(chunk_list); benchmark::DoNotOptimize(serialized); } { - SerializedData serialized = DataSerializer::serialize(storage, chunk_list); + const SerializedData serialized = DataSerializer{storage}.serialize(chunk_list); state.counters["Total Size"] = benchmark::Counter(serialized.allocated_memory(), benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } }