From 07e493509288cc017e77abf819e1a2d06d8a885d Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 5 Nov 2024 13:51:03 +0000 Subject: [PATCH 1/8] Downgrade numpy --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7ac7f19..9618bf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ authors = [ urls = { "Home" = "https://github.com/ecmwf/gribjump" } dependencies = [ "cffi~=1.17", - "numpy~=2.1", + "numpy~=1.26", "pytest~=8.3", "setuptools~=75.1", "findlibs~=0.0.5", @@ -45,4 +45,4 @@ include-package-data = true zip-safe = false [tool.setuptools.package-data] -"pygribjump" = ["VERSION", "pygribjump/src/pygribjump/gribjump_c.h"] \ No newline at end of file +"pygribjump" = ["VERSION", "pygribjump/src/pygribjump/gribjump_c.h"] From d69d4995062f31c023e200b5f4cf06fd98683680 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Mon, 11 Nov 2024 14:30:04 +0000 Subject: [PATCH 2/8] Debugging with timers --- src/gribjump/CMakeLists.txt | 3 + src/gribjump/Engine.cc | 14 +- src/gribjump/ExtractionData.cc | 97 ++++++-- src/gribjump/ExtractionData.h | 6 +- src/gribjump/Lister.cc | 38 ++- src/gribjump/Serialiser.cc | 307 +++++++++++++++++++++++ src/gribjump/Serialiser.h | 53 ++++ src/gribjump/remote/RemoteGribJump.h | 2 +- tests/CMakeLists.txt | 9 + tests/test_serialiser.cc | 357 +++++++++++++++++++++++++++ 10 files changed, 854 insertions(+), 32 deletions(-) create mode 100644 src/gribjump/Serialiser.cc create mode 100644 src/gribjump/Serialiser.h create mode 100644 tests/test_serialiser.cc diff --git a/src/gribjump/CMakeLists.txt b/src/gribjump/CMakeLists.txt index dce4cc5..1e8ee15 100644 --- a/src/gribjump/CMakeLists.txt +++ b/src/gribjump/CMakeLists.txt @@ -49,6 +49,9 @@ list( APPEND gribjump_srcs Metrics.h Metrics.cc Types.h + + Serialiser.h + Serialiser.cc ) if( HAVE_GRIBJUMP_LOCAL_EXTRACT ) diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index c33d82d..668e799 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -103,12 +103,12 @@ flattenedKeys_t buildFlatKeys(const ExtractionRequests& requests, bool flatten) return keymap; } -metkit::mars::MarsRequest unionRequest(const MarsRequests& requests) { +metkit::mars::MarsRequest unionRequest(const ExtractionRequests& requests) { /// @todo: we should do some check not to merge on keys like class and stream - metkit::mars::MarsRequest unionRequest = requests.front(); + metkit::mars::MarsRequest unionRequest = requests.front().request(); for(size_t i = 1; i < requests.size(); ++i) { - unionRequest.merge(requests[i]); + unionRequest.merge(requests[i].request()); } eckit::Log::info() << "Gribjump: Union request is " << unionRequest << std::endl; @@ -153,13 +153,9 @@ filemap_t Engine::buildFileMap(const ExtractionRequests& requests, ExItemMap& ke // Map files to ExtractionItem eckit::Timer timer("Gribjump Engine: Building file map"); - std::vector marsrequests; - for (const auto& req : requests) { - marsrequests.push_back(req.request()); - } - - const metkit::mars::MarsRequest req = unionRequest(marsrequests); + const metkit::mars::MarsRequest req = unionRequest(requests); MetricsManager::instance().set("union_request", req.asString()); + MetricsManager::instance().set("debug_elapsed_union_request", timer.elapsed()); timer.reset("Gribjump Engine: Flattened requests and constructed union request"); filemap_t filemap = FDBLister::instance().fileMap(req, keyToExtractionItem); diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index 2dc249e..68681a4 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -35,7 +35,68 @@ std::vector decodeVector(eckit::Stream& s) { return std::vector(data, data + size); } -// todo: encodeVectorVector ? +void encodeVector(eckit::Stream& s, const std::vector& v) { + size_t size = v.size(); + s << size; + eckit::Buffer buffer(v.data(), size * sizeof(unsigned long long)); + s << buffer; +} + +std::vector decodeVectorUll(eckit::Stream& s) { + size_t size; + s >> size; + eckit::Buffer buffer(size * sizeof(unsigned long long)); + s >> buffer; + unsigned long long* data = (unsigned long long*) buffer.data(); + return std::vector(data, data + size); +} + + +void encodeVectorVector(eckit::Stream& s, const std::vector>& v) { + size_t size = v.size(); + s << size; + size_t totalSize = 0; + for (auto& inner : v) { + totalSize += inner.size(); + s << inner.size(); + } + s << totalSize; + eckit::Buffer buffer(totalSize * sizeof(double)); + double* data = (double*) buffer.data(); + for (auto& inner : v) { + for (auto& d : inner) { + *data++ = d; + } + } + s << buffer; +} + +std::vector> decodeVectorVector(eckit::Stream& s) { + size_t size; + s >> size; + std::vector innerSizes; + size_t totalSize = 0; + for (size_t i = 0; i < size; i++) { + size_t innerSize; + s >> innerSize; + innerSizes.push_back(innerSize); + totalSize += innerSize; + } + + eckit::Buffer buffer(totalSize * sizeof(double)); + s >> buffer; + double* data = (double*) buffer.data(); + + std::vector> result; + size_t offset = 0; + for (auto& innerSize : innerSizes) { + std::vector inner(data + offset, data + offset + innerSize); + result.push_back(inner); + offset += innerSize; + } + + return result; +} } // namespace @@ -53,15 +114,14 @@ ExtractionResult::ExtractionResult(eckit::Stream& s) { values_.push_back(decodeVector(s)); } - std::vector> bitsetStrings; - s >> bitsetStrings; - for (auto& v : bitsetStrings) { - std::vector> bitset; - for (auto& b : v) { - bitset.push_back(std::bitset<64>(b)); - } - mask_.push_back(bitset); - } + // s >> numRanges; + // for (size_t i = 0; i < numRanges; i++) { + // std::vector bitsetUll = decodeVectorUll(s); + // for (auto& b : bitsetUll) { + // mask_[i].push_back(std::bitset<64>(b)); + // } + + // } } void ExtractionResult::values_ptr(double*** values, unsigned long* nrange, unsigned long** nvalues) { @@ -81,15 +141,14 @@ void ExtractionResult::encode(eckit::Stream& s) const { encodeVector(s, v); } - std::vector> bitsetStrings; - for (auto& v : mask_) { - std::vector bitsetString; - for (auto& b : v) { - bitsetString.push_back(b.to_string()); - } - bitsetStrings.push_back(bitsetString); - } - s << bitsetStrings; + // s << mask_.size(); // vector of vectors + // for (auto& v : mask_) { + // std::vector bitsetUll; + // for (auto& b : v) { + // bitsetUll.push_back(b.to_ullong()); + // } + // encodeVector(s, bitsetUll); + // } } void ExtractionResult::print(std::ostream& s) const { diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index 9e2ab98..3ef8ba3 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -25,7 +25,7 @@ namespace gribjump { //---------------------------------------------------------------------------------------------------------------------- -/// @todo This class is now redundant thanks to ExtractionItem. +/// @todo This class is now redundant thanks to ExtractionItem. // XXX not true class ExtractionResult { public: // methods @@ -68,6 +68,8 @@ class ExtractionResult { private: // members std::vector> values_; std::vector>> mask_; + + friend class Serialiser; }; //---------------------------------------------------------------------------------------------------------------------- @@ -96,6 +98,8 @@ class ExtractionRequest { std::vector ranges_; metkit::mars::MarsRequest request_; std::string gridHash_; + + friend class Serialiser; }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/gribjump/Lister.cc b/src/gribjump/Lister.cc index 0a5853b..d0644ba 100644 --- a/src/gribjump/Lister.cc +++ b/src/gribjump/Lister.cc @@ -77,27 +77,47 @@ std::string fdbkeyToStr(const fdb5::Key& key) { filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, const ExItemMap& reqToExtractionItem) { eckit::AutoLock lock(this); filemap_t filemap; + eckit::Timer timer; fdb5::FDBToolRequest fdbreq(unionRequest); auto listIter = fdb_.list(fdbreq, true); + MetricsManager::instance().set("debug_elapsed_fdb_list", timer.elapsed()); + timer.reset("FDB list"); + size_t count = 0; fdb5::ListElement elem; + + // chrono, we're going to accumulate some times + + double time_tostr = 0; + double time_uri = 0; + double time_filemap = 0; + + double time_next=0; + + eckit::Timer timer_next; while (listIter.next(elem)) { + time_next += timer_next.elapsed(); + eckit::Timer timer1; std::string key = fdbkeyToStr(elem.combinedKey()); + time_tostr += timer1.elapsed(); // If key not in map, not related to the request + eckit::Timer timer2; if (reqToExtractionItem.find(key) == reqToExtractionItem.end()) continue; + // Set the URI in the ExtractionItem eckit::URI uri = elem.location().fullUri(); - ExtractionItem* extractionItem = reqToExtractionItem.at(key).get(); extractionItem->URI(uri); + time_uri += timer2.elapsed(); // Add to filemap + eckit::Timer timer3; eckit::PathName fname = uri.path(); auto it = filemap.find(fname); if(it == filemap.end()) { @@ -108,12 +128,21 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons else { it->second.push_back(extractionItem); } + time_filemap += timer3.elapsed(); count++; + + timer_next.reset(""); } - LOG_DEBUG_LIB(LibGribJump) << "Found " << count << " fields in " << filemap.size() << " files" << std::endl; + MetricsManager::instance().set("debug_list_time_tostr", time_tostr); + MetricsManager::instance().set("debug_list_time_uri", time_uri); + MetricsManager::instance().set("debug_list_time_filemap", time_filemap); + + eckit::Timer timer_extra; + + LOG_DEBUG_LIB(LibGribJump) << "Found " << count << " fields in " << filemap.size() << " files" << std::endl; if (count != reqToExtractionItem.size()) { eckit::Log::warning() << "Warning: Number of fields found (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; if (!allowMissing_) { @@ -132,6 +161,11 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons } LOG_DEBUG_LIB(LibGribJump) << "]" << std::endl; } + + MetricsManager::instance().set("debug_list_time_extra", timer_extra.elapsed()); + + MetricsManager::instance().set("debug_listiter_to_filemap", timer.elapsed()); + return filemap; } diff --git a/src/gribjump/Serialiser.cc b/src/gribjump/Serialiser.cc new file mode 100644 index 0000000..2b121aa --- /dev/null +++ b/src/gribjump/Serialiser.cc @@ -0,0 +1,307 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @author Christopher Bradley + +#include "eckit/value/Value.h" +#include "eckit/io/Buffer.h" +#include "gribjump/Serialiser.h" + + +namespace gribjump { + +void Serialiser::encode(eckit::Stream& s, const std::vector& v) { + size_t size = v.size(); + s << size; + eckit::Buffer buffer(v.data(), size * sizeof(double)); + s << buffer; +} + +std::vector Serialiser::decodeVector(eckit::Stream& s) { + size_t size; + s >> size; + eckit::Buffer buffer(size * sizeof(double)); + s >> buffer; + double* data = (double*) buffer.data(); + + return std::vector(data, data + size); +} + +// ----------------------------------------------------------------------------------------------- +// vector of pairs + +// conclusion: naive is worse for vector of pairs. +void Serialiser::encode(eckit::Stream& s, const Ranges& v, bool naive) { + if (naive) { + size_t size = v.size(); + s << size; + for (auto& pair : v) { + s << pair.first; + s << pair.second; + } + return; + } + // else + size_t size = v.size(); + s << size; + + // We know they are pairs, don't need size + eckit::Buffer buffer(v.data(), v.size() * sizeof(size_t) * 2); + s << buffer; +} + +Ranges Serialiser::decodeRanges(eckit::Stream& s, bool naive) { + if (naive) { + Ranges result; + size_t size; + s >> size; + for (size_t i = 0; i < size; i++) { + size_t first; + size_t second; + s >> first; + s >> second; + result.push_back(std::make_pair(first, second)); + } + return result; + } + // else + size_t size; + s >> size; + eckit::Buffer buffer(size * sizeof(size_t) * 2); + s >> buffer; + size_t* data = (size_t*) buffer.data(); + + Ranges result; + for (size_t i = 0; i < size; i++) { + result.push_back(std::make_pair(data[i*2], data[i*2 + 1])); + } + return result; +} + + +// ----------------------------------------------------------------------------------------------- + +void Serialiser::encode(eckit::Stream& s, const std::vector& v) { + // Don't want to just do s << str, since this is quite slow. + // Use a buffer for all strings. + size_t size = v.size(); + s << size; + size_t totalSize = 0; + for (auto& str : v) { + totalSize += str.size(); + s << str.size(); + } + eckit::Buffer buffer(totalSize); + char* data = (char*) buffer.data(); + for (auto& str : v) { + for (auto& c : str) { + *data++ = c; + } + } + s << buffer; +} + +std::vector Serialiser::decodeVectorString(eckit::Stream& s) { + size_t size; + s >> size; + std::vector innerSizes; + size_t totalSize = 0; + for (size_t i = 0; i < size; i++) { + size_t innerSize; + s >> innerSize; + innerSizes.push_back(innerSize); + totalSize += innerSize; + } + + eckit::Buffer buffer(totalSize); + s >> buffer; + char* data = (char*) buffer.data(); + + std::vector result; + size_t offset = 0; + for (auto& innerSize : innerSizes) { + std::string inner(data + offset, innerSize); + result.push_back(inner); + offset += innerSize; + } + + return result; +} + +// Naive version is actually faster +void Serialiser::encodeNaive(eckit::Stream& s, const std::vector& v) { + s << v; +} + +std::vector Serialiser::decodeVectorStringNaive(eckit::Stream& s) { + std::vector result; + s >> result; + return result; +} + +// ----------------------------------------------------------------------------------------------- + +void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { + size_t size = v.size(); + s << size; + size_t totalSize = 0; + for (auto& inner : v) { + totalSize += inner.size(); + s << inner.size(); + } + eckit::Buffer buffer(totalSize * sizeof(double)); + double* data = (double*) buffer.data(); + for (auto& inner : v) { + for (auto& d : inner) { + *data++ = d; + } + } + s << buffer; +} + +std::vector> Serialiser::decodeVectorVector(eckit::Stream& s) { + std::vector> result; + + size_t size; + s >> size; + std::vector innerSizes; + size_t totalSize = 0; + for (size_t i = 0; i < size; i++) { + size_t innerSize; + s >> innerSize; + innerSizes.push_back(innerSize); + totalSize += innerSize; + } + + eckit::Buffer buffer(totalSize * sizeof(double)); + s >> buffer; + double* data = (double*) buffer.data(); + + size_t offset = 0; + for (auto& innerSize : innerSizes) { + std::vector inner(data + offset, data + offset + innerSize); + result.push_back(inner); + offset += innerSize; + } + + return result; +} + +// ----------------------------------------------------------------------------------------------- +void Serialiser::encode(eckit::Stream& s, const std::vector& v, bool naive) { + if (naive) { + size_t size = v.size(); + s << size; + for (auto& req : v) { + req.encode(s); + } + return; + } + + std::vector gridHashes; + RangesList ranges; + + // reserve + gridHashes.reserve(v.size()); + ranges.reserve(v.size()); + + for (auto& req : v) { // This copy is grim + gridHashes.push_back(req.gridHash()); + ranges.push_back(req.ranges()); + } + + encodeNaive(s, gridHashes); + s << ranges.size(); + for (auto& r : ranges) { + encode(s, r, false); + } + + // encode the mars requests naively + s << v.size(); + for (auto& req : v) { + s << req.request(); + } +} + +std::vector Serialiser::decodeExtractionRequests(eckit::Stream& s, bool naive) { + if (naive) { + std::vector result; + size_t size; + s >> size; + for (size_t i = 0; i < size; i++) { + result.push_back(ExtractionRequest(s)); + } + return result; + } + + std::vector gridHashes = decodeVectorStringNaive(s); + + size_t numRanges; + s >> numRanges; + RangesList ranges; + ranges.reserve(numRanges); + for (size_t i = 0; i < numRanges; i++) { + ranges.push_back(decodeRanges(s, false)); + } + + std::vector marsrequests; + size_t numMarsRequests; + s >> numMarsRequests; + marsrequests.reserve(numMarsRequests); + for (size_t i = 0; i < numMarsRequests; i++) { + metkit::mars::MarsRequest marsrequest(s); + marsrequests.push_back(marsrequest); + } + + // repack + std::vector result; + for (size_t i = 0; i < marsrequests.size(); i++) { + result.push_back(ExtractionRequest(marsrequests[i], ranges[i], gridHashes[i])); + } + + return result; +} + + +// ----------------------------------------------------------------------------------------------- +void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { + size_t size = v.size(); + s << size; + for (auto& inner : v) { + size_t innerSize = inner.size(); + s << innerSize; + for (auto& res : inner) { + res.encode(s); + } + } + +} + +std::vector> Serialiser::decodeExtractionResults(eckit::Stream& s) { + std::vector> result; + + size_t size; + s >> size; + for (size_t i = 0; i < size; i++) { + size_t innerSize; + s >> innerSize; + std::vector inner; + for (size_t j = 0; j < innerSize; j++) { + inner.push_back(ExtractionResult(s)); + } + result.push_back(std::move(inner)); + } + + return result; +} + + +} // namespace gribjump + diff --git a/src/gribjump/Serialiser.h b/src/gribjump/Serialiser.h new file mode 100644 index 0000000..cd03717 --- /dev/null +++ b/src/gribjump/Serialiser.h @@ -0,0 +1,53 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @author Christopher Bradley + +#pragma once + +#include +#include "eckit/serialisation/Stream.h" +#include "gribjump/ExtractionData.h" +#include "gribjump/Types.h" + +// Class to help with serialisation of containers + +namespace gribjump { + +class Serialiser { +public: + static void encode(eckit::Stream& s, const std::vector& v); + static std::vector decodeVector(eckit::Stream& s); + + static void encode(eckit::Stream& s, const std::vector& v); + static std::vector decodeVectorString(eckit::Stream& s); + + static void encode(eckit::Stream& s, const std::vector>& v); + static std::vector> decodeVectorVector(eckit::Stream& s); + + static void encode(eckit::Stream& s, const std::vector& v, bool naive=false); + static std::vector decodeExtractionRequests(eckit::Stream& s, bool naive=false); + + // We tend to have a vector of vectors of ExtractionResults + static void encode(eckit::Stream& s, const std::vector>& v); + static std::vector> decodeExtractionResults(eckit::Stream& s); + + + static void encode(eckit::Stream& s, const Ranges& v, bool naive=false); + static Ranges decodeRanges(eckit::Stream& s, bool naive=false); + + //------------------------------------------------------------------------------------------------- + // Naive implementations, for timing comparison + static void encodeNaive(eckit::Stream& s, const std::vector& v); + static std::vector decodeVectorStringNaive(eckit::Stream& s); +}; + +} // namespace gribjump + diff --git a/src/gribjump/remote/RemoteGribJump.h b/src/gribjump/remote/RemoteGribJump.h index e32d066..be7dabc 100644 --- a/src/gribjump/remote/RemoteGribJump.h +++ b/src/gribjump/remote/RemoteGribJump.h @@ -24,7 +24,7 @@ enum class RequestType : uint16_t { SCAN, FORWARD_EXTRACT }; -constexpr static uint16_t remoteProtocolVersion = 0; +constexpr static uint16_t remoteProtocolVersion = 1; class RemoteGribJump : public GribJumpBase { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7e1326c..570c02b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -120,6 +120,15 @@ ecbuild_add_test( LIBS gribjump ) +ecbuild_add_test( + TARGET "gribjump_test_serialiser" + SOURCES "test_serialiser.cc" + INCLUDES "${ECKIT_INCLUDE_DIRS}" + ENVIRONMENT "${gribjump_env}" + NO_AS_NEEDED + LIBS gribjump +) + ecbuild_add_test( TARGET "gribjump_test_misc_units" SOURCES "test_misc_units.cc" diff --git a/tests/test_serialiser.cc b/tests/test_serialiser.cc new file mode 100644 index 0000000..098c55c --- /dev/null +++ b/tests/test_serialiser.cc @@ -0,0 +1,357 @@ +/* + * (C) Copyright 1996- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "eckit/testing/Test.h" +#include "eckit/filesystem/PathName.h" +#include "eckit/filesystem/LocalPathName.h" +#include "eckit/serialisation/FileStream.h" +#include "eckit/io/AutoCloser.h" + +#include "metkit/mars/MarsRequest.h" + +#include "gribjump/Serialiser.h" +#include "gribjump/ExtractionData.h" + +#include "eckit/log/Timer.h" + + +using namespace eckit::testing; + +namespace gribjump::test { + +// Useful for timing +constexpr bool REPORT_TIMES = true; +constexpr size_t N_VECTOR = 100; +constexpr size_t N_VECTOR_VECTOR = 10; +constexpr size_t N_EXTRACTIONREQUESTS = 10; +constexpr size_t N_EXTRACTIONRESULTS = 10; + + +void reportTimes(size_t N, double serialiseTime, double deserialiseTime) { + if (!REPORT_TIMES) return; + eckit::Log::info() << " For N=" << N << std::endl; + eckit::Log::info() << " Serialisation time: " << serialiseTime << std::endl; + eckit::Log::info() << " Deserialisation time: " << deserialiseTime << std::endl; + eckit::Log::info() << " Total time: " << serialiseTime + deserialiseTime << std::endl; +} + +//----------------------------------------------------------------------------- + + +// CASE( "Serialisation: Vector" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// std::vector vout(N_VECTOR); +// for (size_t i = 0; i < N_VECTOR; i++) { +// vout[i] = i; +// } + +// eckit::Timer timer_serialise; +// { +// eckit::FileStream sout(filename, "w"); +// auto c = eckit::closer(sout); +// Serialiser::encode(sout, vout); +// } +// timer_serialise.stop(); + +// eckit::Timer timer_deserialise; +// std::vector vin; +// { +// eckit::FileStream sin(filename, "r"); +// auto c = eckit::closer(sin); +// vin = Serialiser::decodeVector(sin); +// } +// timer_deserialise.stop(); + +// EXPECT_EQUAL(vout.size(), vin.size()); +// for (size_t i = 0; i < vout.size(); i++) { +// EXPECT_EQUAL(vout[i], vin[i]); +// } + +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// } + + +// //----------------------------------------------------------------------------- + + +// CASE( "Serialisation Naive: Vector" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// std::vector vout; +// for (size_t i = 0; i < N_VECTOR; i++) { +// vout.push_back("this is a test string look at it go woah " + std::to_string(i)); +// } + +// eckit::Timer timer_serialise; +// { +// eckit::FileStream sout(filename, "w"); +// auto c = eckit::closer(sout); +// Serialiser::encodeNaive(sout, vout); +// } +// timer_serialise.stop(); + +// eckit::Timer timer_deserialise; +// std::vector vin; +// { +// eckit::FileStream sin(filename, "r"); +// auto c = eckit::closer(sin); +// vin = Serialiser::decodeVectorStringNaive(sin); +// } +// timer_deserialise.stop(); + +// EXPECT_EQUAL(vout.size(), vin.size()); +// for (size_t i = 0; i < vout.size(); i++) { +// EXPECT_EQUAL(vout[i], vin[i]); +// } + +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// } + +// //----------------------------------------------------------------------------- + +// CASE( "Serialisation: Vector" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// std::vector vout; +// for (size_t i = 0; i < N_VECTOR; i++) { +// vout.push_back("this is a test string look at it go woah and look its getting even bigger now this is probably big enough " + std::to_string(i)); +// } + +// eckit::Timer timer_serialise; +// { +// eckit::FileStream sout(filename, "w"); +// auto c = eckit::closer(sout); +// Serialiser::encode(sout, vout); +// } +// timer_serialise.stop(); + +// eckit::Timer timer_deserialise; +// std::vector vin; +// { +// eckit::FileStream sin(filename, "r"); +// auto c = eckit::closer(sin); +// vin = Serialiser::decodeVectorString(sin); +// } +// timer_deserialise.stop(); + +// EXPECT_EQUAL(vout.size(), vin.size()); +// for (size_t i = 0; i < vout.size(); i++) { +// EXPECT_EQUAL(vout[i], vin[i]); +// } + +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// } + + + +//----------------------------------------------------------------------------- + +CASE( "Serialisation: Ranges" ) { + + eckit::PathName filename = "test_serialiser.out"; + + bool naive = false; + for (size_t i = 0; i < 2; i++) { + Ranges vout; + for (size_t i = 0; i < N_VECTOR; i++) { + vout.push_back(Range(i, i+10)); + } + + eckit::Timer timer_serialise; + { + eckit::FileStream sout(filename, "w"); + auto c = eckit::closer(sout); + Serialiser::encode(sout, vout, naive); + } + timer_serialise.stop(); + + eckit::Timer timer_deserialise; + Ranges vin; + { + eckit::FileStream sin(filename, "r"); + auto c = eckit::closer(sin); + vin = Serialiser::decodeRanges(sin, naive); + } + timer_deserialise.stop(); + + EXPECT_EQUAL(vout.size(), vin.size()); + for (size_t i = 0; i < vout.size(); i++) { + EXPECT_EQUAL(vout[i].first, vin[i].first); + EXPECT_EQUAL(vout[i].second, vin[i].second); + } + + eckit::Log::info() << "Naive: " << naive << std::endl; + reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); + naive = !naive; + } +} + +//----------------------------------------------------------------------------- + +// CASE( "Serialisation: Vector>" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// std::vector> vout; +// for (size_t i = 0; i < N_VECTOR_VECTOR; i++) { +// std::vector inner = {1.0, 2.0, 3.0}; +// vout.push_back(inner); +// } + +// eckit::Timer timer_serialise; +// { +// eckit::FileStream sout(filename, "w"); +// auto c = eckit::closer(sout); +// Serialiser::encode(sout, vout); +// } +// timer_serialise.stop(); + +// eckit::Timer timer_deserialise; +// std::vector> vin; +// { +// eckit::FileStream sin(filename, "r"); +// auto c = eckit::closer(sin); +// vin = Serialiser::decodeVectorVector(sin); +// } +// timer_deserialise.stop(); + +// EXPECT_EQUAL(vout.size(), vin.size()); +// for (size_t i = 0; i < vout.size(); i++) { +// EXPECT_EQUAL(vout[i].size(), vin[i].size()); +// for (size_t j = 0; j < vout[i].size(); j++) { +// EXPECT_EQUAL(vout[i][j], vin[i][j]); +// } +// } + +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// } + +// //----------------------------------------------------------------------------- + +CASE( "Serialisation: Vector" ) { + + eckit::PathName filename = "test_serialiser.out"; + bool naive = false; + for (size_t i = 0; i < 2; i++) { + std::vector vout; + for (size_t i = 0; i < N_EXTRACTIONREQUESTS; i++) { + std::string s = "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=" + std::to_string(i); + metkit::mars::MarsRequest marsrequest = metkit::mars::MarsRequest::parse(s); + Ranges ranges = {Range(i, i+10), Range(i+11, i+12), Range(i+100, i+200)}; + std::string hash = "testHash"; + + ExtractionRequest req(marsrequest, ranges, hash); + vout.push_back(req); + } + + eckit::Timer timer_serialise; + { + eckit::FileStream sout(filename, "w"); + auto c = eckit::closer(sout); + Serialiser::encode(sout, vout, naive); + } + timer_serialise.stop(); + + eckit::Timer timer_deserialise; + std::vector vin; + { + eckit::FileStream sin(filename, "r"); + auto c = eckit::closer(sin); + vin = Serialiser::decodeExtractionRequests(sin, naive); + } + timer_deserialise.stop(); + + EXPECT_EQUAL(vout.size(), vin.size()); + for (size_t i = 0; i < vout.size(); i++) { + EXPECT_EQUAL(vout[i].request().asString(), vin[i].request().asString()); + EXPECT_EQUAL(vout[i].gridHash(), vin[i].gridHash()); + EXPECT_EQUAL(vout[i].ranges(), vin[i].ranges()); + } + + eckit::Log::info() << "Naive: " << naive << std::endl; + reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); + naive = !naive; + } +} + +// //----------------------------------------------------------------------------- + +// CASE( "Serialisation: Vector>" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// std::vector> vout; +// for (size_t i = 0; i < N_EXTRACTIONRESULTS; i++) { +// std::vector inner; +// for (size_t j = 0; j < 1; j++) { +// std::vector> values = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; +// std::vector>> mask = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; +// inner.push_back(ExtractionResult(values, mask)); +// } +// vout.push_back(std::move(inner)); +// } + +// eckit::Timer timer_serialise; +// { +// eckit::FileStream sout(filename, "w"); +// auto c = eckit::closer(sout); +// Serialiser::encode(sout, vout); +// } +// timer_serialise.stop(); + +// eckit::Timer timer_deserialise; +// std::vector> vin; +// { +// eckit::FileStream sin(filename, "r"); +// auto c = eckit::closer(sin); +// vin = Serialiser::decodeExtractionResults(sin); +// } +// timer_deserialise.stop(); + +// EXPECT_EQUAL(vout.size(), vin.size()); +// for (size_t i = 0; i < vout.size(); i++) { +// EXPECT_EQUAL(vout[i].size(), vin[i].size()); +// for (size_t j = 0; j < vout[i].size(); j++) { +// auto vout_values = vout[i][j].values(); +// auto vin_values = vin[i][j].values(); +// EXPECT_EQUAL(vout_values.size(), vin_values.size()); +// for (size_t k = 0; k < vout_values.size(); k++) { +// EXPECT_EQUAL(vout_values[k], vin_values[k]); +// } + +// auto vout_mask = vout[i][j].mask(); +// auto vin_mask = vin[i][j].mask(); +// EXPECT_EQUAL(vout_mask.size(), vin_mask.size()); +// for (size_t k = 0; k < vout_mask.size(); k++) { +// EXPECT_EQUAL(vout_mask[k].size(), vin_mask[k].size()); +// for (size_t l = 0; l < vout_mask[k].size(); l++) { +// EXPECT_EQUAL(vout_mask[k][l], vin_mask[k][l]); +// } +// } +// } +// } + +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// } + +} // namespace gribjump + + +int main(int argc, char **argv) +{ + return run_tests ( argc, argv ); +} From 8adf51b98acdebd05f44b6b9d14bd7c798c649fa Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 12 Nov 2024 19:18:38 +0000 Subject: [PATCH 3/8] String instead of MarsRequest, wip --- pygribjump/src/pygribjump/pygribjump.py | 7 +- src/gribjump/Engine.cc | 232 ++++++++++++++++++++--- src/gribjump/Engine.h | 3 +- src/gribjump/ExtractionData.cc | 50 ++++- src/gribjump/ExtractionData.h | 6 +- src/gribjump/ExtractionItem.h | 6 +- src/gribjump/GribJumpBase.h | 3 +- src/gribjump/Lister.cc | 17 +- src/gribjump/LocalGribJump.cc | 2 +- src/gribjump/gribjump_c.cc | 14 +- src/gribjump/remote/Request.cc | 2 +- tests/test_engine.cc | 240 ++++++++++++------------ tests/test_gribinfo.cc | 47 ++--- 13 files changed, 436 insertions(+), 193 deletions(-) diff --git a/pygribjump/src/pygribjump/pygribjump.py b/pygribjump/src/pygribjump/pygribjump.py index e52e7a0..ac0108c 100644 --- a/pygribjump/src/pygribjump/pygribjump.py +++ b/pygribjump/src/pygribjump/pygribjump.py @@ -258,7 +258,8 @@ class ExtractionRequest: The ranges to extract. """ def __init__(self, req, ranges, gridHash=None): - reqstr = "retrieve,"+dic_to_request(req) + # reqstr = "retrieve,"+dic_to_request(req) + reqstr = dic_to_request(req) rangestr = list_to_rangestr(ranges) request = ffi.new('gribjump_extraction_request_t**') c_reqstr = ffi.new("char[]", reqstr.encode()) @@ -358,7 +359,9 @@ def list_to_rangestr(ranges): def dic_to_request(dic): # e.g. {"class":"od", "expver":"0001", "levtype":"pl"} -> "class=od,expver=0001,levtype=pl" - return ','.join(['='.join([k, v]) for k, v in dic.items()]) + # return ','.join(['='.join([k, v]) for k, v in dic.items()]) + # We order the keys in alphabetical order + return ','.join(['='.join([k, dic[k]]) for k in sorted(dic.keys())]) def version(): return __version__ diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index 668e799..e6ad2f9 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -11,8 +11,10 @@ /// @author Christopher Bradley #include "eckit/log/Plural.h" +#include "eckit/utils/StringTools.h" #include "metkit/mars/MarsExpension.h" +#include "metkit/mars/MarsParser.h" #include "gribjump/Engine.h" #include "gribjump/ExtractionItem.h" @@ -80,6 +82,18 @@ flattenedKeys_t buildFlatKeys(const ExtractionRequests& requests, bool flatten) flattenedKeys_t keymap; + // ASSERT(!flatten); // polytope already gives flat requests + + // for (const auto& req : requests) { + // const std::string& baseRequest = req.request_string(); + // ASSERT(!baseRequest.empty()); + // keymap[baseRequest] = std::vector(); + // keymap[baseRequest].push_back(baseRequest); + // } + // return keymap; + + // // debug/ + ASSERT(false); for (const auto& req : requests) { const metkit::mars::MarsRequest& baseRequest = req.request(); keymap[baseRequest] = std::vector(); @@ -103,18 +117,71 @@ flattenedKeys_t buildFlatKeys(const ExtractionRequests& requests, bool flatten) return keymap; } -metkit::mars::MarsRequest unionRequest(const ExtractionRequests& requests) { +// ---------------------------------------------------------------------------------------------------------------------- +std::string unionise(const std::vector& requests) { + // Take many marsrequest-like strings and combine them into one string. + // Note, makes some assumptions: + // 1. Each string is unique + // 2. For each key there is a single value. E.g., "step=1/2/3" should be pre-split into three strings. + // 3. Does not check if the string is sensible. + // takes a vector of strings, each like "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0" + // The result is a string, that should be parsable my mars. + + // split the string into key value pairs by comma + std::map> keyValues; + for (auto& r : requests) { + const std::string& s = r.request_string(); + std::vector kvs = eckit::StringTools::split(",", s); // might be faster to use tokenizer directly. + for (auto& kv : kvs) { + std::vector kv_s = eckit::StringTools::split("=", kv); + if (kv_s.size() != 2) continue; // ignore verb + keyValues[kv_s[0]].insert(kv_s[1]); + } - /// @todo: we should do some check not to merge on keys like class and stream - metkit::mars::MarsRequest unionRequest = requests.front().request(); - for(size_t i = 1; i < requests.size(); ++i) { - unionRequest.merge(requests[i].request()); + // Important! Canonicalise string so that we can use it to match with fdb. We do this by sorting the keys. + // e.g. expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0 + // becomes class=od,date=20241110,domain=g,expver=xxxx,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0 + std::sort(kvs.begin(), kvs.end()); + std::string canonicalised = ""; + for (auto& kv : kvs) { + // skip if it is "retrieve" + if (kv.find("retrieve") != std::string::npos) continue; + canonicalised += kv; + if (kv != kvs.back()) { + canonicalised += ","; + } + } + // r.request_string(canonicalised); + NOTIMP; } - - eckit::Log::info() << "Gribjump: Union request is " << unionRequest << std::endl; - - return unionRequest; + + // now construct a string with all the values: + // expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0/1/2/3/4 + std::string result = "retrieve,"; + size_t i = 0; + for (auto& [key, values] : keyValues) { + result += key + "="; + if (values.size() == 1) { + result += *values.begin(); + } else { + size_t j = 0; + for (auto& value : values) { + result += value; + if (j != values.size() - 1) { + result += "/"; + } + j++; + } + } + if (i != keyValues.size() - 1) { + result += ","; + } + i++; + } + + return result; } +// ---------------------------------------------------------------------------------------------------------------------- bool isRemote(eckit::URI uri) { return uri.scheme() == "fdb"; @@ -127,38 +194,139 @@ Engine::Engine() {} Engine::~Engine() {} -ExItemMap Engine::buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten){ - ExItemMap keyToExtractionItem; - flattenedKeys_t flatKeys = buildFlatKeys(requests, flatten); // Map from base request to {flattened keys} +// metkit::mars::MarsRequest unionRequest(const ExtractionRequests& requests) { + +// /// @todo: we should do some check not to merge on keys like class and stream +// // metkit::mars::MarsRequest unionRequest = requests.front().request(); +// // for(size_t i = 1; i < requests.size(); ++i) { +// // unionRequest.merge(requests[i].request()); +// // } +// std::string unionRequestStr = unionise(requests); +// std::istringstream istream(unionRequestStr); +// metkit::mars::MarsParser parser(istream); +// std::vector unionRequests = parser.parse(); +// ASSERT(unionRequests.size() == 1); +// metkit::mars::MarsRequest unionRequest = unionRequests[0]; +// eckit::Log::info() << "Gribjump: Union request is " << unionRequest << std::endl; + +// MetricsManager::instance().set("union_request", unionRequestStr); + +// return unionRequest; +// } + +metkit::mars::MarsRequest combinedFoo(const ExtractionRequests& requests, bool flatten, ExItemMap& keyToExtractionItem ){ + + ASSERT(!flatten); + + // Split strings into one unified map + std::map> keyValues; + for (auto& r : requests) { + const std::string& s = r.request_string(); + std::vector kvs = eckit::StringTools::split(",", s); // might be faster to use tokenizer directly. + for (auto& kv : kvs) { + std::vector kv_s = eckit::StringTools::split("=", kv); + if (kv_s.size() != 2) continue; // ignore verb + keyValues[kv_s[0]].insert(kv_s[1]); + } - LOG_DEBUG_LIB(LibGribJump) << "Built flat keys" << std::endl; + // Important! Canonicalise string by sorting keys + std::sort(kvs.begin(), kvs.end()); + std::string canonicalised = ""; + for (auto& kv : kvs) { + // skip if it is "retrieve" + if (kv.find("retrieve") != std::string::npos) continue; + canonicalised += kv; + if (kv != kvs.back()) { + canonicalised += ","; + } + } + ASSERT(keyToExtractionItem.find(canonicalised) == keyToExtractionItem.end()); /// no repeats + auto extractionItem = std::make_unique(canonicalised, r.ranges()); // TODO: XXX we're giving it the mars request instead of the request string + extractionItem->gridHash(r.gridHash()); + keyToExtractionItem.emplace(canonicalised, std::move(extractionItem)); // 1-to-1-map + } - // Create the 1-to-1 map - for (size_t i = 0; i < requests.size(); i++) { - const metkit::mars::MarsRequest& basereq = requests[i].request(); - const std::vector keys = flatKeys[basereq]; - for (const auto& key : keys) { - ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// @todo support duplicated requests? - auto extractionItem = std::make_unique(basereq, requests[i].ranges()); - extractionItem->gridHash(requests[i].gridHash()); - keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map + // --- construct the union request + + // now construct a string with all the values: + // expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0/1/2/3/4 + std::string result = "retrieve,"; + size_t i = 0; + for (auto& [key, values] : keyValues) { + result += key + "="; + if (values.size() == 1) { + result += *values.begin(); + } else { + size_t j = 0; + for (auto& value : values) { + result += value; + if (j != values.size() - 1) { + result += "/"; + } + j++; + } + } + if (i != keyValues.size() - 1) { + result += ","; } + i++; + } + + std::istringstream istream(result); + metkit::mars::MarsParser parser(istream); + std::vector unionRequests = parser.parse(); + ASSERT(unionRequests.size() == 1); + + return unionRequests[0]; +} + +ExItemMap Engine::buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten){ + + ExItemMap keyToExtractionItem; + ASSERT(!flatten); + // It is already flat, and we already have strings. + for (size_t i = 0; i < requests.size(); i++) { + const std::string& key = requests[i].request_string(); + ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// no repeats + auto extractionItem = std::make_unique(requests[i].request_string(), requests[i].ranges()); // TODO: XXX we're giving it the mars request instead of the request string + extractionItem->gridHash(requests[i].gridHash()); + keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map } return keyToExtractionItem; + + + // Code that assumes we might need to flatten, which requires you to be using mars requests not strings: + ASSERT(false); + // flattenedKeys_t flatKeys = buildFlatKeys(requests, flatten); // Map from base request to {flattened keys} + + // LOG_DEBUG_LIB(LibGribJump) << "Built flat keys" << std::endl; + + // // Create the 1-to-1 map + // for (size_t i = 0; i < requests.size(); i++) { + // const metkit::mars::MarsRequest& basereq = requests[i].request(); + // const std::vector keys = flatKeys[basereq]; + // for (const auto& key : keys) { + // ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// @todo support duplicated requests? + // auto extractionItem = std::make_unique(basereq, requests[i].ranges()); + // extractionItem->gridHash(requests[i].gridHash()); + // keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map + // } + // } + + // return keyToExtractionItem; } -filemap_t Engine::buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem) { +filemap_t Engine::buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem) { // Map files to ExtractionItem eckit::Timer timer("Gribjump Engine: Building file map"); - const metkit::mars::MarsRequest req = unionRequest(requests); - MetricsManager::instance().set("union_request", req.asString()); + // const metkit::mars::MarsRequest req = unionRequest(requests); MetricsManager::instance().set("debug_elapsed_union_request", timer.elapsed()); timer.reset("Gribjump Engine: Flattened requests and constructed union request"); - filemap_t filemap = FDBLister::instance().fileMap(req, keyToExtractionItem); + filemap_t filemap = FDBLister::instance().fileMap(unionrequest, keyToExtractionItem); return filemap; } @@ -246,8 +414,15 @@ void Engine::scheduleTasks(filemap_t& filemap){ ResultsMap Engine::extract(const ExtractionRequests& requests, bool flatten) { eckit::Timer timer("Engine::extract"); - ExItemMap keyToExtractionItem = buildKeyToExtractionItem(requests, flatten); // Owns the ExtractionItems - filemap_t filemap = buildFileMap(requests, keyToExtractionItem); + + // // Combine these? + // ExItemMap keyToExtractionItem = buildKeyToExtractionItem(requests, flatten); // Owns the ExtractionItems + // const metkit::mars::MarsRequest unionreq = unionRequest(requests); + + ExItemMap keyToExtractionItem; + metkit::mars::MarsRequest unionreq = combinedFoo(requests, flatten, keyToExtractionItem); + + filemap_t filemap = buildFileMap(unionreq, keyToExtractionItem); MetricsManager::instance().set("elapsed_build_filemap", timer.elapsed()); timer.reset("Gribjump Engine: Built file map"); @@ -268,6 +443,7 @@ ResultsMap Engine::collectResults(ExItemMap& keyToExtractionItem) { // Create map of base request to vector of extraction items. Takes ownership of the ExtractionItems ResultsMap results; + // NOTIMP; for (auto& [key, ex] : keyToExtractionItem) { results[ex->request()].push_back(std::move(ex)); } diff --git a/src/gribjump/Engine.h b/src/gribjump/Engine.h index 57f5760..b93466d 100644 --- a/src/gribjump/Engine.h +++ b/src/gribjump/Engine.h @@ -43,7 +43,8 @@ class Engine { private: - filemap_t buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem); + // filemap_t buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem); + filemap_t buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem); ExItemMap buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten); ResultsMap collectResults(ExItemMap& keyToExtractionItem); void forwardRemoteExtraction(filemap_t& filemap); diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index 68681a4..2d018a3 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -14,6 +14,8 @@ #include "eckit/value/Value.h" #include "eckit/io/Buffer.h" +#include "metkit/mars/MarsParser.h" + namespace gribjump { namespace { @@ -114,7 +116,7 @@ ExtractionResult::ExtractionResult(eckit::Stream& s) { values_.push_back(decodeVector(s)); } - // s >> numRanges; + // s >> numRanges; // maybe wrong // for (size_t i = 0; i < numRanges; i++) { // std::vector bitsetUll = decodeVectorUll(s); // for (auto& b : bitsetUll) { @@ -122,6 +124,17 @@ ExtractionResult::ExtractionResult(eckit::Stream& s) { // } // } + + + std::vector> bitsetStrings; + s >> bitsetStrings; + for (auto& v : bitsetStrings) { + std::vector> bitset; + for (auto& b : v) { + bitset.push_back(std::bitset<64>(b)); + } + mask_.push_back(bitset); + } } void ExtractionResult::values_ptr(double*** values, unsigned long* nrange, unsigned long** nvalues) { @@ -149,6 +162,17 @@ void ExtractionResult::encode(eckit::Stream& s) const { // } // encodeVector(s, bitsetUll); // } + + + std::vector> bitsetStrings; + for (auto& v : mask_) { + std::vector bitsetString; + for (auto& b : v) { + bitsetString.push_back(b.to_string()); + } + bitsetStrings.push_back(bitsetString); + } + s << bitsetStrings; } void ExtractionResult::print(std::ostream& s) const { @@ -183,11 +207,30 @@ ExtractionRequest::ExtractionRequest(const metkit::mars::MarsRequest& request, c ranges_(ranges), request_(request), gridHash_(gridHash) + { + + NOTIMP; // debug + } + +ExtractionRequest::ExtractionRequest(const std::string& request, const std::vector& ranges, std::string gridHash): + ranges_(ranges), + request_string_(request), + gridHash_(gridHash) {} + ExtractionRequest::ExtractionRequest() {} ExtractionRequest::ExtractionRequest(eckit::Stream& s) { - request_ = metkit::mars::MarsRequest(s); + // request_ = metkit::mars::MarsRequest(s); // original + + // Now convert it to a mars request + s >> request_string_; + // request_ = metkit::mars::MarsRequest::parse(request_string_); // very very slow + // takes an istream + // std::istringstream istream(request_string_); + // metkit::mars::MarsParser parser(istream); + // request_ = parser.parse()[0]; // hard asserting that this expands to one request + s >> gridHash_; size_t numRanges; s >> numRanges; @@ -228,7 +271,8 @@ eckit::Stream& operator<<(eckit::Stream& s, const ExtractionRequest& o) { } void ExtractionRequest::encode(eckit::Stream& s) const { - s << request_; + // s << request_; + s << request_string_; s << gridHash_; s << ranges_.size(); for (auto& [start, end] : ranges_) { diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index 3ef8ba3..f88499a 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -80,12 +80,15 @@ class ExtractionRequest { ExtractionRequest(); ExtractionRequest(const metkit::mars::MarsRequest&, const std::vector&, std::string gridHash=""); + ExtractionRequest(const std::string&, const std::vector&, std::string gridHash=""); explicit ExtractionRequest(eckit::Stream& s); std::vector split(const std::vector& keys) const; std::vector split(const std::string& key) const; const std::vector& ranges() const {return ranges_;} - const metkit::mars::MarsRequest& request() const {return request_;} + const metkit::mars::MarsRequest& request() const {NOTIMP; return request_;} + const std::string& request_string() const {return request_string_;} + void request_string(const std::string& s) {request_string_ = s;} const std::string& gridHash() const {return gridHash_;} private: // methods @@ -97,6 +100,7 @@ class ExtractionRequest { private: // members std::vector ranges_; metkit::mars::MarsRequest request_; + std::string request_string_; // debug: is this better? std::string gridHash_; friend class Serialiser; diff --git a/src/gribjump/ExtractionItem.h b/src/gribjump/ExtractionItem.h index 607b2fd..1fcc0d7 100644 --- a/src/gribjump/ExtractionItem.h +++ b/src/gribjump/ExtractionItem.h @@ -26,7 +26,7 @@ class ExtractionItem : public eckit::NonCopyable { public: - ExtractionItem(const metkit::mars::MarsRequest& baseRequest, const Ranges& ranges): + ExtractionItem(const std::string& baseRequest, const Ranges& ranges): request_(baseRequest), ranges_(ranges) { /// @note We could reserve the values and mask here based on the ranges /// @note We're not always going to have mars requests (e.g. file name, tree, ...) More generic object? @@ -41,7 +41,7 @@ class ExtractionItem : public eckit::NonCopyable { ExValues& values() { return values_; } const ExMask& mask() const { return mask_; } const Ranges& intervals() const { return ranges_; } - const metkit::mars::MarsRequest& request() const { return request_; } + const std::string& request() const { return request_; } /// @note alternatively we could store the offset directly instead of the uri. eckit::Offset offset() const { @@ -99,7 +99,7 @@ class ExtractionItem : public eckit::NonCopyable { private: - const metkit::mars::MarsRequest request_; + const std::string request_; const Ranges ranges_; // Set on Listing diff --git a/src/gribjump/GribJumpBase.h b/src/gribjump/GribJumpBase.h index 190355f..704ab97 100644 --- a/src/gribjump/GribJumpBase.h +++ b/src/gribjump/GribJumpBase.h @@ -33,7 +33,8 @@ namespace fdb5 { namespace gribjump { -using ResultsMap = std::map>>; +// using ResultsMap = std::map>>; +using ResultsMap = std::map>>; class GribJumpBase : public eckit::NonCopyable { public: diff --git a/src/gribjump/Lister.cc b/src/gribjump/Lister.cc index d0644ba..fff0700 100644 --- a/src/gribjump/Lister.cc +++ b/src/gribjump/Lister.cc @@ -78,8 +78,9 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons eckit::AutoLock lock(this); filemap_t filemap; eckit::Timer timer; - + std::cout << "DEBUG: unionRequest: " << unionRequest << std::endl; fdb5::FDBToolRequest fdbreq(unionRequest); + auto listIter = fdb_.list(fdbreq, true); MetricsManager::instance().set("debug_elapsed_fdb_list", timer.elapsed()); @@ -98,7 +99,9 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons double time_next=0; eckit::Timer timer_next; + size_t fdb_count=0; while (listIter.next(elem)) { + fdb_count++; time_next += timer_next.elapsed(); eckit::Timer timer1; @@ -132,7 +135,8 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons count++; - timer_next.reset(""); + // timer_next.reset(""); + } MetricsManager::instance().set("debug_list_time_tostr", time_tostr); @@ -143,11 +147,13 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons eckit::Timer timer_extra; LOG_DEBUG_LIB(LibGribJump) << "Found " << count << " fields in " << filemap.size() << " files" << std::endl; + LOG_DEBUG_LIB(LibGribJump) << "FDB count: " << fdb_count << std::endl; if (count != reqToExtractionItem.size()) { eckit::Log::warning() << "Warning: Number of fields found (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; if (!allowMissing_) { std::stringstream ss; ss << "Found " << count << " fields but " << reqToExtractionItem.size() << " were requested." << std::endl; + ss << "Union request: " << unionRequest << std::endl; throw DataNotFoundException(ss.str()); } } @@ -166,6 +172,13 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons MetricsManager::instance().set("debug_listiter_to_filemap", timer.elapsed()); + // // XXX DEBUG + // std::stringstream ss; + // ss << "DEBUG EXIT: FDBLister::fileMap() took " << timer.elapsed() << "s" << std::endl; + // ss << "Union request" << unionRequest << std::endl; + // throw eckit::SeriousBug(ss.str()); + // // XXX DEBUG + return filemap; } diff --git a/src/gribjump/LocalGribJump.cc b/src/gribjump/LocalGribJump.cc index 1386ce9..84c54d5 100644 --- a/src/gribjump/LocalGribJump.cc +++ b/src/gribjump/LocalGribJump.cc @@ -93,7 +93,7 @@ std::vector>> LocalGribJump::extra std::vector>> extractionResults; for (auto& req : requests) { - auto it = results.find(req.request()); + auto it = results.find(req.request_string()); ASSERT(it != results.end()); std::vector> res; for (auto& item : it->second) { diff --git a/src/gribjump/gribjump_c.cc b/src/gribjump/gribjump_c.cc index 0b323a5..9d183b1 100644 --- a/src/gribjump/gribjump_c.cc +++ b/src/gribjump/gribjump_c.cc @@ -137,23 +137,23 @@ int gribjump_new_request(gribjump_extraction_request_t** request, const char* re // rangesstr is a comma-separated list of ranges, e.g. "0-10,20-30" // NB: Treat the requests as raw requests. - std::istringstream iss(reqstr); - metkit::mars::MarsParser parser(iss); - std::vector requests = parser.parse(); - ASSERT(requests.size() == 1); - metkit::mars::MarsRequest mreq(requests[0]); + // std::istringstream iss(reqstr); + // metkit::mars::MarsParser parser(iss); + // std::vector requests = parser.parse(); + // ASSERT(requests.size() == 1); + // metkit::mars::MarsRequest mreq(requests[0]); // Parse the ranges string std::vector ranges = eckit::StringTools::split(",", rangesstr); std::vector rangevec; for (const auto& range : ranges) { - std::vector kv = eckit::StringTools::split("-", range); + std::vector kv = eckit::StringTools::split("-", range); // this is awful ASSERT(kv.size() == 2); rangevec.push_back(std::make_pair(std::stoi(kv[0]), std::stoi(kv[1]))); } std::string gridhash_str = gridhash ? std::string(gridhash) : ""; - *request = new gribjump_extraction_request_t(mreq, rangevec, gridhash_str); + *request = new gribjump_extraction_request_t(reqstr, rangevec, gridhash_str); }); } diff --git a/src/gribjump/remote/Request.cc b/src/gribjump/remote/Request.cc index 57d3cab..2793c6c 100644 --- a/src/gribjump/remote/Request.cc +++ b/src/gribjump/remote/Request.cc @@ -120,7 +120,7 @@ void ExtractRequest::replyToClient() { for (size_t i = 0; i < nRequests; i++) { LOG_DEBUG_LIB(LibGribJump) << "Sending result " << i << " to client" << std::endl; - auto it = results_.find(requests_[i].request()); + auto it = results_.find(requests_[i].request_string()); ASSERT(it != results_.end()); std::vector>& items = it->second; // ExtractionItems items = it->second; diff --git a/tests/test_engine.cc b/tests/test_engine.cc index 18bbc32..d8ee6ca 100644 --- a/tests/test_engine.cc +++ b/tests/test_engine.cc @@ -96,127 +96,127 @@ CASE ("Engine: pre-test setup") { CASE ("Engine: Basic extraction") { - - // --- Setup - eckit::testing::SetEnv fdbconfig("FDB5_CONFIG", fdbConfig(tmpdir).c_str()); - eckit::testing::SetEnv allowmissing("GRIBJUMP_ALLOW_MISSING", "0"); // We have deliberately missing data in the request. - - // --- Extract (test 1) - std::vector requests = { - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc")[0].request() // Deliberately missing data - }; - - std::vector> allIntervals = { - {std::make_pair(0, 5), std::make_pair(20, 30)}, - {std::make_pair(0, 5), std::make_pair(20, 30)}, - {std::make_pair(0, 5), std::make_pair(20, 30)}, - {std::make_pair(0, 5), std::make_pair(20, 30)} - }; - - Engine engine; - ExtractionRequests exRequests; - for (size_t i = 0; i < requests.size(); i++) { - exRequests.push_back(ExtractionRequest(requests[i], allIntervals[i], gridHash)); - } - // We expect a throw due to missing data - EXPECT_THROWS_AS(engine.extract(exRequests, false), DataNotFoundException); - - // drop the final request - exRequests.pop_back(); - - ResultsMap results = engine.extract(exRequests, false); - EXPECT_NO_THROW(engine.raiseErrors()); - - // print contents of map - for (auto& [req, exs] : results) { - LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; - for (auto& ex : exs) { - ex->debug_print(); - } - } - - // Check correct values - size_t count = 0; - for (size_t i = 0; i < 3; i++) { - metkit::mars::MarsRequest req = requests[i]; - std::vector intervals = allIntervals[i]; - auto& exs = results[req]; - auto comparisonValues = eccodesExtract(req, intervals); - for (size_t j = 0; j < exs.size(); j++) { - for (size_t k = 0; k < comparisonValues[j].size(); k++) { - for (size_t l = 0; l < comparisonValues[j][k].size(); l++) { - count++; - double v = exs[j]->values()[k][l]; - if (std::isnan(v)) { - EXPECT(comparisonValues[j][k][l] == 9999); - continue; - } - - EXPECT(comparisonValues[j][k][l] == v); - } - } - } - } - // only count the 3 intervals with data - EXPECT(count == 45); - - // --- Extract (test 2) - // Same request, all in one (test flattening) - /// @todo, currently, the user cannot know order of the results after flattening, making this feature not very useful. - /// We impose an order internally (currently, alphabetical). + NOTIMP; + // // --- Setup + // eckit::testing::SetEnv fdbconfig("FDB5_CONFIG", fdbConfig(tmpdir).c_str()); + // eckit::testing::SetEnv allowmissing("GRIBJUMP_ALLOW_MISSING", "0"); // We have deliberately missing data in the request. + + // // --- Extract (test 1) + // std::vector requests = { + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc")[0].request(), + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc")[0].request() // Deliberately missing data + // }; + + // std::vector> allIntervals = { + // {std::make_pair(0, 5), std::make_pair(20, 30)}, + // {std::make_pair(0, 5), std::make_pair(20, 30)}, + // {std::make_pair(0, 5), std::make_pair(20, 30)}, + // {std::make_pair(0, 5), std::make_pair(20, 30)} + // }; + + // Engine engine; + // ExtractionRequests exRequests; + // for (size_t i = 0; i < requests.size(); i++) { + // exRequests.push_back(ExtractionRequest(requests[i], allIntervals[i], gridHash)); + // } + // // We expect a throw due to missing data + // EXPECT_THROWS_AS(engine.extract(exRequests, false), DataNotFoundException); + + // // drop the final request + // exRequests.pop_back(); + + // ResultsMap results = engine.extract(exRequests, false); + // EXPECT_NO_THROW(engine.raiseErrors()); + + // // print contents of map + // for (auto& [req, exs] : results) { + // LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; + // for (auto& ex : exs) { + // ex->debug_print(); + // } + // } + + // // Check correct values + // size_t count = 0; + // for (size_t i = 0; i < 3; i++) { + // metkit::mars::MarsRequest req = requests[i]; + // std::vector intervals = allIntervals[i]; + // auto& exs = results[req]; + // auto comparisonValues = eccodesExtract(req, intervals); + // for (size_t j = 0; j < exs.size(); j++) { + // for (size_t k = 0; k < comparisonValues[j].size(); k++) { + // for (size_t l = 0; l < comparisonValues[j][k].size(); l++) { + // count++; + // double v = exs[j]->values()[k][l]; + // if (std::isnan(v)) { + // EXPECT(comparisonValues[j][k][l] == 9999); + // continue; + // } + + // EXPECT(comparisonValues[j][k][l] == v); + // } + // } + // } + // } + // // only count the 3 intervals with data + // EXPECT(count == 45); + + // // --- Extract (test 2) + // // Same request, all in one (test flattening) + // /// @todo, currently, the user cannot know order of the results after flattening, making this feature not very useful. + // /// We impose an order internally (currently, alphabetical). - allIntervals = { - {std::make_pair(0, 5), std::make_pair(20, 30)}, - }; - - requests = { - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1/2/3,stream=oper,time=1200,type=fc")[0].request() - }; - - ASSERT(requests.size() == 1); - - exRequests.clear(); - exRequests.push_back(ExtractionRequest(requests[0], allIntervals[0], gridHash)); - - results = engine.extract(exRequests, true); - EXPECT_NO_THROW(engine.raiseErrors()); - - // print contents of map - for (auto& [req, exs] : results) { - LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; - for (auto& ex : exs) { - ex->debug_print(); - } - } - - // compare results - - metkit::mars::MarsRequest req = requests[0]; - auto& exs = results[req]; - auto comparisonValues = eccodesExtract(req, allIntervals[0])[0]; // [0] Because each archived field has identical values. - count = 0; - for (size_t j = 0; j < exs.size(); j++) { - auto values = exs[j]->values(); - for (size_t k = 0; k < values.size(); k++) { - for (size_t l = 0; l < values[k].size(); l++) { - count++; - double v = values[k][l]; - if (std::isnan(v)) { - EXPECT(comparisonValues[k][l] == 9999); - continue; - } - - EXPECT(comparisonValues[k][l] == v); - } - } - } - EXPECT(count == 45); - - /// @todo: request touching multiple files? - /// @todo: request involving unsupported packingType? + // allIntervals = { + // {std::make_pair(0, 5), std::make_pair(20, 30)}, + // }; + + // requests = { + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1/2/3,stream=oper,time=1200,type=fc")[0].request() + // }; + + // ASSERT(requests.size() == 1); + + // exRequests.clear(); + // exRequests.push_back(ExtractionRequest(requests[0], allIntervals[0], gridHash)); + + // results = engine.extract(exRequests, true); + // EXPECT_NO_THROW(engine.raiseErrors()); + + // // print contents of map + // for (auto& [req, exs] : results) { + // LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; + // for (auto& ex : exs) { + // ex->debug_print(); + // } + // } + + // // compare results + + // metkit::mars::MarsRequest req = requests[0]; + // auto& exs = results[req]; + // auto comparisonValues = eccodesExtract(req, allIntervals[0])[0]; // [0] Because each archived field has identical values. + // count = 0; + // for (size_t j = 0; j < exs.size(); j++) { + // auto values = exs[j]->values(); + // for (size_t k = 0; k < values.size(); k++) { + // for (size_t l = 0; l < values[k].size(); l++) { + // count++; + // double v = values[k][l]; + // if (std::isnan(v)) { + // EXPECT(comparisonValues[k][l] == 9999); + // continue; + // } + + // EXPECT(comparisonValues[k][l] == v); + // } + // } + // } + // EXPECT(count == 45); + + // /// @todo: request touching multiple files? + // /// @todo: request involving unsupported packingType? } diff --git a/tests/test_gribinfo.cc b/tests/test_gribinfo.cc index ff997d8..2c116e9 100644 --- a/tests/test_gribinfo.cc +++ b/tests/test_gribinfo.cc @@ -233,38 +233,39 @@ CASE ("test_wrong_jumper") { // Testing the extract functionality using ExtractionItem // ~ i.e. internals of FileExtractionTask CASE ("test_ExtractionItem_extract") { - metkit::mars::MarsRequest request("none"); - auto intervals = std::vector{{0, 10}, {3000000, 3000010}, {6599670, 6599680}}; - ExtractionItem exItem(request, intervals ); + NOTIMP; + // metkit::mars::MarsRequest request("none"); + // auto intervals = std::vector{{0, 10}, {3000000, 3000010}, {6599670, 6599680}}; + // ExtractionItem exItem(request, intervals ); - eckit::PathName path = "2t_O1280.grib"; + // eckit::PathName path = "2t_O1280.grib"; - exItem.URI(eckit::URI(path)); + // exItem.URI(eckit::URI(path)); - eckit::FileHandle fh(path); - fh.openForRead(); + // eckit::FileHandle fh(path); + // fh.openForRead(); - eckit::Offset offset = 0; + // eckit::Offset offset = 0; - std::unique_ptr info(InfoFactory::instance().build(fh, offset)); - EXPECT(info); + // std::unique_ptr info(InfoFactory::instance().build(fh, offset)); + // EXPECT(info); - std::unique_ptr jumper(JumperFactory::instance().build(*info)); + // std::unique_ptr jumper(JumperFactory::instance().build(*info)); - jumper->extract(fh, offset, *info, exItem); + // jumper->extract(fh, offset, *info, exItem); - exItem.debug_print(); + // exItem.debug_print(); - // Check correct values - std::vector> comparisonValues = eccodesExtract(path, {offset}, intervals)[0]; - EXPECT(comparisonValues.size() == 3); - - for (size_t i = 0; i < comparisonValues.size(); i++) { - EXPECT(comparisonValues[i].size() == 10); - for (size_t j = 0; j < comparisonValues[i].size(); j++) { - EXPECT(comparisonValues[i][j] == exItem.values()[i][j]); - } - } + // // Check correct values + // std::vector> comparisonValues = eccodesExtract(path, {offset}, intervals)[0]; + // EXPECT(comparisonValues.size() == 3); + + // for (size_t i = 0; i < comparisonValues.size(); i++) { + // EXPECT(comparisonValues[i].size() == 10); + // for (size_t j = 0; j < comparisonValues[i].size(); j++) { + // EXPECT(comparisonValues[i][j] == exItem.values()[i][j]); + // } + // } } //----------------------------------------------------------------------------- From 2eeab22c5ab77e678e18e7a3a73a133c34131662 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Wed, 13 Nov 2024 00:14:32 +0000 Subject: [PATCH 4/8] Wider use of string over mars request. No longer support flatten --- pygribjump/src/pygribjump/pygribjump.py | 4 +- src/gribjump/Engine.cc | 236 +-------- src/gribjump/Engine.h | 6 +- src/gribjump/ExtractionData.cc | 44 +- src/gribjump/ExtractionData.h | 11 +- src/gribjump/ExtractionItem.h | 2 +- src/gribjump/GribJump.cc | 2 +- src/gribjump/GribJump.h | 2 +- src/gribjump/GribJumpBase.h | 4 +- src/gribjump/Lister.cc | 8 - src/gribjump/LocalGribJump.cc | 15 +- src/gribjump/LocalGribJump.h | 6 +- src/gribjump/Serialiser.cc | 608 ++++++++++++------------ src/gribjump/Serialiser.h | 76 +-- src/gribjump/gribjump_c.cc | 3 +- src/gribjump/remote/RemoteGribJump.cc | 11 +- src/gribjump/remote/RemoteGribJump.h | 4 +- src/gribjump/remote/Request.cc | 6 +- src/gribjump/remote/Request.h | 1 - src/gribjump/tools/ToolUtils.cc | 32 +- src/gribjump/tools/ToolUtils.h | 2 + src/tools/gribjump-extract.cc | 15 +- src/tools/gribjump-validate.cc | 2 +- tests/remote/test_remote.cc | 12 +- tests/test_api.cc | 55 ++- tests/test_engine.cc | 239 +++++----- tests/test_gribinfo.cc | 46 +- tests/test_serialiser.cc | 450 +++++++++--------- 28 files changed, 857 insertions(+), 1045 deletions(-) diff --git a/pygribjump/src/pygribjump/pygribjump.py b/pygribjump/src/pygribjump/pygribjump.py index ac0108c..ddae332 100644 --- a/pygribjump/src/pygribjump/pygribjump.py +++ b/pygribjump/src/pygribjump/pygribjump.py @@ -359,9 +359,7 @@ def list_to_rangestr(ranges): def dic_to_request(dic): # e.g. {"class":"od", "expver":"0001", "levtype":"pl"} -> "class=od,expver=0001,levtype=pl" - # return ','.join(['='.join([k, v]) for k, v in dic.items()]) - # We order the keys in alphabetical order - return ','.join(['='.join([k, dic[k]]) for k in sorted(dic.keys())]) + return ','.join(['='.join([k, v]) for k, v in dic.items()]) def version(): return __version__ diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index e6ad2f9..87fe980 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -28,159 +28,7 @@ namespace gribjump { //---------------------------------------------------------------------------------------------------------------------- // Stringify requests and keys alphabetically -namespace -{ -std::string requestToStr(const metkit::mars::MarsRequest& request) { - std::stringstream ss; - std::string separator = ""; - std::vector keys = request.params(); - std::sort(keys.begin(), keys.end()); - for(const auto& key : keys) { - ss << separator << key << "=" << request[key]; - separator = ","; - } - return ss.str(); -} - -//---------------------------------------------------------------------------------------------------------------------- - - -class CollectFlattenedRequests : public metkit::mars::FlattenCallback { -public: - CollectFlattenedRequests(std::vector& flattenedRequests) : flattenedRequests_(flattenedRequests) {} - - virtual void operator()(const metkit::mars::MarsRequest& req) { - flattenedRequests_.push_back(req); - } - - std::vector& flattenedRequests_; -}; - -std::vector flattenRequest(const metkit::mars::MarsRequest& request) { - - metkit::mars::MarsExpension expansion(false); - metkit::mars::DummyContext ctx; - std::vector flattenedRequests; - - CollectFlattenedRequests cb(flattenedRequests); - expansion.flatten(ctx, request, cb); - - LOG_DEBUG_LIB(LibGribJump) << "Base request: " << request << std::endl; - - for (const auto& req : flattenedRequests) { - LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; - } - - return flattenedRequests; -} - -// Stringify requests, and flatten if necessary - -typedef std::map> flattenedKeys_t; - -flattenedKeys_t buildFlatKeys(const ExtractionRequests& requests, bool flatten) { - - flattenedKeys_t keymap; - - // ASSERT(!flatten); // polytope already gives flat requests - - // for (const auto& req : requests) { - // const std::string& baseRequest = req.request_string(); - // ASSERT(!baseRequest.empty()); - // keymap[baseRequest] = std::vector(); - // keymap[baseRequest].push_back(baseRequest); - // } - // return keymap; - - // // debug/ - ASSERT(false); - for (const auto& req : requests) { - const metkit::mars::MarsRequest& baseRequest = req.request(); - keymap[baseRequest] = std::vector(); - - // Assume baseRequest has cardinality >= 1 and may need to be flattened - if (flatten) { - std::vector flat = flattenRequest(baseRequest); - for (const auto& r : flat) { - keymap[baseRequest].push_back(requestToStr(r)); - } - } - - // Assume baseRequest has cardinality 1 - else { - keymap[baseRequest].push_back(requestToStr(baseRequest)); - } - - eckit::Log::debug() << "Flattened keys for request " << baseRequest << ": " << keymap[baseRequest] << std::endl; - } - - return keymap; -} - -// ---------------------------------------------------------------------------------------------------------------------- -std::string unionise(const std::vector& requests) { - // Take many marsrequest-like strings and combine them into one string. - // Note, makes some assumptions: - // 1. Each string is unique - // 2. For each key there is a single value. E.g., "step=1/2/3" should be pre-split into three strings. - // 3. Does not check if the string is sensible. - // takes a vector of strings, each like "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0" - // The result is a string, that should be parsable my mars. - - // split the string into key value pairs by comma - std::map> keyValues; - for (auto& r : requests) { - const std::string& s = r.request_string(); - std::vector kvs = eckit::StringTools::split(",", s); // might be faster to use tokenizer directly. - for (auto& kv : kvs) { - std::vector kv_s = eckit::StringTools::split("=", kv); - if (kv_s.size() != 2) continue; // ignore verb - keyValues[kv_s[0]].insert(kv_s[1]); - } - - // Important! Canonicalise string so that we can use it to match with fdb. We do this by sorting the keys. - // e.g. expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0 - // becomes class=od,date=20241110,domain=g,expver=xxxx,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0 - std::sort(kvs.begin(), kvs.end()); - std::string canonicalised = ""; - for (auto& kv : kvs) { - // skip if it is "retrieve" - if (kv.find("retrieve") != std::string::npos) continue; - canonicalised += kv; - if (kv != kvs.back()) { - canonicalised += ","; - } - } - // r.request_string(canonicalised); - NOTIMP; - } - - // now construct a string with all the values: - // expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0/1/2/3/4 - std::string result = "retrieve,"; - size_t i = 0; - for (auto& [key, values] : keyValues) { - result += key + "="; - if (values.size() == 1) { - result += *values.begin(); - } else { - size_t j = 0; - for (auto& value : values) { - result += value; - if (j != values.size() - 1) { - result += "/"; - } - j++; - } - } - if (i != keyValues.size() - 1) { - result += ","; - } - i++; - } - - return result; -} +namespace { // ---------------------------------------------------------------------------------------------------------------------- bool isRemote(eckit::URI uri) { @@ -194,35 +42,12 @@ Engine::Engine() {} Engine::~Engine() {} - -// metkit::mars::MarsRequest unionRequest(const ExtractionRequests& requests) { - -// /// @todo: we should do some check not to merge on keys like class and stream -// // metkit::mars::MarsRequest unionRequest = requests.front().request(); -// // for(size_t i = 1; i < requests.size(); ++i) { -// // unionRequest.merge(requests[i].request()); -// // } -// std::string unionRequestStr = unionise(requests); -// std::istringstream istream(unionRequestStr); -// metkit::mars::MarsParser parser(istream); -// std::vector unionRequests = parser.parse(); -// ASSERT(unionRequests.size() == 1); -// metkit::mars::MarsRequest unionRequest = unionRequests[0]; -// eckit::Log::info() << "Gribjump: Union request is " << unionRequest << std::endl; - -// MetricsManager::instance().set("union_request", unionRequestStr); - -// return unionRequest; -// } - -metkit::mars::MarsRequest combinedFoo(const ExtractionRequests& requests, bool flatten, ExItemMap& keyToExtractionItem ){ - - ASSERT(!flatten); - +metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ){ // Split strings into one unified map + // We also canonicalise the requests std::map> keyValues; for (auto& r : requests) { - const std::string& s = r.request_string(); + const std::string& s = r.requestString(); std::vector kvs = eckit::StringTools::split(",", s); // might be faster to use tokenizer directly. for (auto& kv : kvs) { std::vector kv_s = eckit::StringTools::split("=", kv); @@ -234,23 +59,20 @@ metkit::mars::MarsRequest combinedFoo(const ExtractionRequests& requests, bool f std::sort(kvs.begin(), kvs.end()); std::string canonicalised = ""; for (auto& kv : kvs) { - // skip if it is "retrieve" - if (kv.find("retrieve") != std::string::npos) continue; canonicalised += kv; if (kv != kvs.back()) { canonicalised += ","; } } ASSERT(keyToExtractionItem.find(canonicalised) == keyToExtractionItem.end()); /// no repeats - auto extractionItem = std::make_unique(canonicalised, r.ranges()); // TODO: XXX we're giving it the mars request instead of the request string + r.requestString(canonicalised); + auto extractionItem = std::make_unique(canonicalised, r.ranges()); extractionItem->gridHash(r.gridHash()); keyToExtractionItem.emplace(canonicalised, std::move(extractionItem)); // 1-to-1-map } // --- construct the union request - // now construct a string with all the values: - // expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=0/1/2/3/4 std::string result = "retrieve,"; size_t i = 0; for (auto& [key, values] : keyValues) { @@ -281,48 +103,10 @@ metkit::mars::MarsRequest combinedFoo(const ExtractionRequests& requests, bool f return unionRequests[0]; } -ExItemMap Engine::buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten){ - - ExItemMap keyToExtractionItem; - ASSERT(!flatten); - // It is already flat, and we already have strings. - for (size_t i = 0; i < requests.size(); i++) { - const std::string& key = requests[i].request_string(); - ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// no repeats - auto extractionItem = std::make_unique(requests[i].request_string(), requests[i].ranges()); // TODO: XXX we're giving it the mars request instead of the request string - extractionItem->gridHash(requests[i].gridHash()); - keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map - } - - return keyToExtractionItem; - - - // Code that assumes we might need to flatten, which requires you to be using mars requests not strings: - ASSERT(false); - // flattenedKeys_t flatKeys = buildFlatKeys(requests, flatten); // Map from base request to {flattened keys} - - // LOG_DEBUG_LIB(LibGribJump) << "Built flat keys" << std::endl; - - // // Create the 1-to-1 map - // for (size_t i = 0; i < requests.size(); i++) { - // const metkit::mars::MarsRequest& basereq = requests[i].request(); - // const std::vector keys = flatKeys[basereq]; - // for (const auto& key : keys) { - // ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// @todo support duplicated requests? - // auto extractionItem = std::make_unique(basereq, requests[i].ranges()); - // extractionItem->gridHash(requests[i].gridHash()); - // keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map - // } - // } - - // return keyToExtractionItem; -} - filemap_t Engine::buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem) { // Map files to ExtractionItem eckit::Timer timer("Gribjump Engine: Building file map"); - // const metkit::mars::MarsRequest req = unionRequest(requests); MetricsManager::instance().set("debug_elapsed_union_request", timer.elapsed()); timer.reset("Gribjump Engine: Flattened requests and constructed union request"); @@ -411,16 +195,12 @@ void Engine::scheduleTasks(filemap_t& filemap){ taskGroup_.waitForTasks(); } -ResultsMap Engine::extract(const ExtractionRequests& requests, bool flatten) { +ResultsMap Engine::extract(ExtractionRequests& requests) { eckit::Timer timer("Engine::extract"); - // // Combine these? - // ExItemMap keyToExtractionItem = buildKeyToExtractionItem(requests, flatten); // Owns the ExtractionItems - // const metkit::mars::MarsRequest unionreq = unionRequest(requests); - ExItemMap keyToExtractionItem; - metkit::mars::MarsRequest unionreq = combinedFoo(requests, flatten, keyToExtractionItem); + metkit::mars::MarsRequest unionreq = buildRequestMap(requests, keyToExtractionItem); filemap_t filemap = buildFileMap(unionreq, keyToExtractionItem); MetricsManager::instance().set("elapsed_build_filemap", timer.elapsed()); diff --git a/src/gribjump/Engine.h b/src/gribjump/Engine.h index b93466d..d0182dc 100644 --- a/src/gribjump/Engine.h +++ b/src/gribjump/Engine.h @@ -28,7 +28,7 @@ class Engine { Engine(); ~Engine(); - ResultsMap extract(const ExtractionRequests& requests, bool flattenRequests = false); + ResultsMap extract(ExtractionRequests& requests); // byfiles: scan entire file, not just fields matching request size_t scan(const MarsRequests& requests, bool byfiles = false); @@ -43,11 +43,11 @@ class Engine { private: - // filemap_t buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem); filemap_t buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem); - ExItemMap buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten); ResultsMap collectResults(ExItemMap& keyToExtractionItem); void forwardRemoteExtraction(filemap_t& filemap); + metkit::mars::MarsRequest buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ); + private: diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index 2d018a3..bdf305f 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -203,15 +203,6 @@ eckit::Stream& operator<<(eckit::Stream& s, const ExtractionResult& o) { //--------------------------------------------------------------------------------------------------------------------- -ExtractionRequest::ExtractionRequest(const metkit::mars::MarsRequest& request, const std::vector& ranges, std::string gridHash): - ranges_(ranges), - request_(request), - gridHash_(gridHash) - { - - NOTIMP; // debug - } - ExtractionRequest::ExtractionRequest(const std::string& request, const std::vector& ranges, std::string gridHash): ranges_(ranges), request_string_(request), @@ -242,27 +233,30 @@ ExtractionRequest::ExtractionRequest(eckit::Stream& s) { } std::vector ExtractionRequest::split(const std::string& key) const { + NOTIMP; + // todo: implement for string request - std::vector reqs = request_.split(key); + // std::vector reqs = request_.split(key); - std::vector requests; - requests.reserve(reqs.size()); - for (auto& r : reqs) { - requests.push_back(ExtractionRequest(r, ranges_)); - } - return requests; + // std::vector requests; + // requests.reserve(reqs.size()); + // for (auto& r : reqs) { + // requests.push_back(ExtractionRequest(r, ranges_)); + // } + // return requests; } std::vector ExtractionRequest::split(const std::vector& keys) const { - - std::vector reqs = request_.split(keys); - - std::vector requests; - requests.reserve(reqs.size()); - for (auto& r : reqs) { - requests.push_back(ExtractionRequest(r, ranges_)); - } - return requests; + NOTIMP; + // todo: implement for string request + // std::vector reqs = request_.split(keys); + + // std::vector requests; + // requests.reserve(reqs.size()); + // for (auto& r : reqs) { + // requests.push_back(ExtractionRequest(r, ranges_)); + // } + // return requests; } eckit::Stream& operator<<(eckit::Stream& s, const ExtractionRequest& o) { diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index f88499a..2ffa890 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -24,9 +24,6 @@ namespace gribjump { //---------------------------------------------------------------------------------------------------------------------- - -/// @todo This class is now redundant thanks to ExtractionItem. // XXX not true - class ExtractionResult { public: // methods @@ -79,16 +76,16 @@ class ExtractionRequest { public: // methods ExtractionRequest(); - ExtractionRequest(const metkit::mars::MarsRequest&, const std::vector&, std::string gridHash=""); + // ExtractionRequest(const metkit::mars::MarsRequest&, const std::vector&, std::string gridHash=""); ExtractionRequest(const std::string&, const std::vector&, std::string gridHash=""); explicit ExtractionRequest(eckit::Stream& s); std::vector split(const std::vector& keys) const; std::vector split(const std::string& key) const; const std::vector& ranges() const {return ranges_;} - const metkit::mars::MarsRequest& request() const {NOTIMP; return request_;} - const std::string& request_string() const {return request_string_;} - void request_string(const std::string& s) {request_string_ = s;} + // const metkit::mars::MarsRequest& request() const {NOTIMP; return request_;} + const std::string& requestString() const {return request_string_;} + void requestString(const std::string& s) {request_string_ = s;} const std::string& gridHash() const {return gridHash_;} private: // methods diff --git a/src/gribjump/ExtractionItem.h b/src/gribjump/ExtractionItem.h index 1fcc0d7..f90b892 100644 --- a/src/gribjump/ExtractionItem.h +++ b/src/gribjump/ExtractionItem.h @@ -68,7 +68,7 @@ class ExtractionItem : public eckit::NonCopyable { void debug_print() const { std::cout << "ExtractionItem: {" << std::endl; - std::cout << " MarsRequest: " << request_ << std::endl; + std::cout << " RequestString: " << request_ << std::endl; std::cout << " Ranges: " << std::endl; for (auto& r : ranges_) { std::cout << " {" << r.first << ", " << r.second << "}" << std::endl; diff --git a/src/gribjump/GribJump.cc b/src/gribjump/GribJump.cc index d40c8c2..0a3f1c5 100644 --- a/src/gribjump/GribJump.cc +++ b/src/gribjump/GribJump.cc @@ -50,7 +50,7 @@ size_t GribJump::scan(const std::vector requests, boo } -std::vector>> GribJump::extract(const std::vector& requests, const LogContext& ctx) { +std::vector>> GribJump::extract(std::vector& requests, const LogContext& ctx) { ContextManager::instance().set(ctx); if (requests.empty()) { diff --git a/src/gribjump/GribJump.h b/src/gribjump/GribJump.h index ae29e7e..c9a3e43 100644 --- a/src/gribjump/GribJump.h +++ b/src/gribjump/GribJump.h @@ -46,7 +46,7 @@ class GribJump { size_t scan(const std::vector& paths, const LogContext& ctx=LogContext("none")); size_t scan(std::vector requests, bool byfiles = false, const LogContext& ctx=LogContext("none")); - std::vector>> extract(const std::vector& requests, const LogContext& ctx=LogContext("none")); + std::vector>> extract(std::vector& requests, const LogContext& ctx=LogContext("none")); std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges, const LogContext& ctx=LogContext("none")); std::map> axes(const std::string& request, int level=3, const LogContext& ctx=LogContext("none")); diff --git a/src/gribjump/GribJumpBase.h b/src/gribjump/GribJumpBase.h index 704ab97..046f3b0 100644 --- a/src/gribjump/GribJumpBase.h +++ b/src/gribjump/GribJumpBase.h @@ -46,9 +46,9 @@ class GribJumpBase : public eckit::NonCopyable { size_t virtual scan(const std::vector& paths) = 0; - virtual size_t scan(const std::vector requests, bool byfiles) = 0; + virtual size_t scan(const std::vector& requests, bool byfiles) = 0; - virtual std::vector>> extract(std::vector) = 0; + virtual std::vector>> extract(std::vector&) = 0; virtual std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) = 0; virtual std::map> axes(const std::string& request, int level) = 0; diff --git a/src/gribjump/Lister.cc b/src/gribjump/Lister.cc index fff0700..5d5be10 100644 --- a/src/gribjump/Lister.cc +++ b/src/gribjump/Lister.cc @@ -172,14 +172,6 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons MetricsManager::instance().set("debug_listiter_to_filemap", timer.elapsed()); - // // XXX DEBUG - // std::stringstream ss; - // ss << "DEBUG EXIT: FDBLister::fileMap() took " << timer.elapsed() << "s" << std::endl; - // ss << "Union request" << unionRequest << std::endl; - // throw eckit::SeriousBug(ss.str()); - // // XXX DEBUG - - return filemap; } diff --git a/src/gribjump/LocalGribJump.cc b/src/gribjump/LocalGribJump.cc index 84c54d5..5365078 100644 --- a/src/gribjump/LocalGribJump.cc +++ b/src/gribjump/LocalGribJump.cc @@ -53,7 +53,7 @@ size_t LocalGribJump::scan(const std::vector& paths) { return engine.scan(paths); } -size_t LocalGribJump::scan(const std::vector requests, bool byfiles) { +size_t LocalGribJump::scan(const std::vector& requests, bool byfiles) { Engine engine; return engine.scan(requests, byfiles); } @@ -84,20 +84,19 @@ std::vector> LocalGribJump::extract(const eckit: } /// @todo, change API, remove extraction request -std::vector>> LocalGribJump::extract(ExtractionRequests requests) { +std::vector>> LocalGribJump::extract(ExtractionRequests& requests) { - bool flatten = true; Engine engine; - ResultsMap results = engine.extract(requests, flatten); + ResultsMap results = engine.extract(requests); engine.raiseErrors(); std::vector>> extractionResults; for (auto& req : requests) { - auto it = results.find(req.request_string()); + auto it = results.find(req.requestString()); + ASSERT(it != results.end()); std::vector> res; for (auto& item : it->second) { - // std::unique_ptr r(new ExtractionResult(item->values(), item->mask())); res.push_back(std::make_unique(item->values(), item->mask())); } @@ -107,7 +106,7 @@ std::vector>> LocalGribJump::extra return extractionResults; } -ResultsMap LocalGribJump::extract(const std::vector& requests, const std::vector>& ranges, bool flatten) { +ResultsMap LocalGribJump::extract(const std::vector& requests, const std::vector>& ranges) { Engine engine; ExtractionRequests extractionRequests; @@ -115,7 +114,7 @@ ResultsMap LocalGribJump::extract(const std::vector& requests, cons extractionRequests.push_back(ExtractionRequest(requests[i], ranges[i])); } - ResultsMap results = engine.extract(extractionRequests, flatten); + ResultsMap results = engine.extract(extractionRequests); engine.raiseErrors(); return results; } diff --git a/src/gribjump/LocalGribJump.h b/src/gribjump/LocalGribJump.h index dbaa238..f0fdf17 100644 --- a/src/gribjump/LocalGribJump.h +++ b/src/gribjump/LocalGribJump.h @@ -32,14 +32,14 @@ class LocalGribJump : public GribJumpBase { /// @param path full path to grib file size_t scan(const std::vector& paths) override; - size_t scan(const std::vector requests, bool byfiles) override; + size_t scan(const std::vector& requests, bool byfiles) override; // new API! - ResultsMap extract(const std::vector& requests, const std::vector>& ranges, bool flatten); + ResultsMap extract(const std::vector& requests, const std::vector>& ranges); // old API std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) override; - std::vector>> extract(std::vector) override; + std::vector>> extract(std::vector&) override; std::map> axes(const std::string& request, int level) override; diff --git a/src/gribjump/Serialiser.cc b/src/gribjump/Serialiser.cc index 2b121aa..a86fb70 100644 --- a/src/gribjump/Serialiser.cc +++ b/src/gribjump/Serialiser.cc @@ -1,307 +1,307 @@ -/* - * (C) Copyright 2024- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ - -/// @author Christopher Bradley - -#include "eckit/value/Value.h" -#include "eckit/io/Buffer.h" -#include "gribjump/Serialiser.h" - - -namespace gribjump { - -void Serialiser::encode(eckit::Stream& s, const std::vector& v) { - size_t size = v.size(); - s << size; - eckit::Buffer buffer(v.data(), size * sizeof(double)); - s << buffer; -} - -std::vector Serialiser::decodeVector(eckit::Stream& s) { - size_t size; - s >> size; - eckit::Buffer buffer(size * sizeof(double)); - s >> buffer; - double* data = (double*) buffer.data(); - - return std::vector(data, data + size); -} - -// ----------------------------------------------------------------------------------------------- -// vector of pairs - -// conclusion: naive is worse for vector of pairs. -void Serialiser::encode(eckit::Stream& s, const Ranges& v, bool naive) { - if (naive) { - size_t size = v.size(); - s << size; - for (auto& pair : v) { - s << pair.first; - s << pair.second; - } - return; - } - // else - size_t size = v.size(); - s << size; - - // We know they are pairs, don't need size - eckit::Buffer buffer(v.data(), v.size() * sizeof(size_t) * 2); - s << buffer; -} - -Ranges Serialiser::decodeRanges(eckit::Stream& s, bool naive) { - if (naive) { - Ranges result; - size_t size; - s >> size; - for (size_t i = 0; i < size; i++) { - size_t first; - size_t second; - s >> first; - s >> second; - result.push_back(std::make_pair(first, second)); - } - return result; - } - // else - size_t size; - s >> size; - eckit::Buffer buffer(size * sizeof(size_t) * 2); - s >> buffer; - size_t* data = (size_t*) buffer.data(); - - Ranges result; - for (size_t i = 0; i < size; i++) { - result.push_back(std::make_pair(data[i*2], data[i*2 + 1])); - } - return result; -} - - -// ----------------------------------------------------------------------------------------------- - -void Serialiser::encode(eckit::Stream& s, const std::vector& v) { - // Don't want to just do s << str, since this is quite slow. - // Use a buffer for all strings. - size_t size = v.size(); - s << size; - size_t totalSize = 0; - for (auto& str : v) { - totalSize += str.size(); - s << str.size(); - } - eckit::Buffer buffer(totalSize); - char* data = (char*) buffer.data(); - for (auto& str : v) { - for (auto& c : str) { - *data++ = c; - } - } - s << buffer; -} - -std::vector Serialiser::decodeVectorString(eckit::Stream& s) { - size_t size; - s >> size; - std::vector innerSizes; - size_t totalSize = 0; - for (size_t i = 0; i < size; i++) { - size_t innerSize; - s >> innerSize; - innerSizes.push_back(innerSize); - totalSize += innerSize; - } - - eckit::Buffer buffer(totalSize); - s >> buffer; - char* data = (char*) buffer.data(); - - std::vector result; - size_t offset = 0; - for (auto& innerSize : innerSizes) { - std::string inner(data + offset, innerSize); - result.push_back(inner); - offset += innerSize; - } - - return result; -} - -// Naive version is actually faster -void Serialiser::encodeNaive(eckit::Stream& s, const std::vector& v) { - s << v; -} - -std::vector Serialiser::decodeVectorStringNaive(eckit::Stream& s) { - std::vector result; - s >> result; - return result; -} - -// ----------------------------------------------------------------------------------------------- - -void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { - size_t size = v.size(); - s << size; - size_t totalSize = 0; - for (auto& inner : v) { - totalSize += inner.size(); - s << inner.size(); - } - eckit::Buffer buffer(totalSize * sizeof(double)); - double* data = (double*) buffer.data(); - for (auto& inner : v) { - for (auto& d : inner) { - *data++ = d; - } - } - s << buffer; -} - -std::vector> Serialiser::decodeVectorVector(eckit::Stream& s) { - std::vector> result; - - size_t size; - s >> size; - std::vector innerSizes; - size_t totalSize = 0; - for (size_t i = 0; i < size; i++) { - size_t innerSize; - s >> innerSize; - innerSizes.push_back(innerSize); - totalSize += innerSize; - } - - eckit::Buffer buffer(totalSize * sizeof(double)); - s >> buffer; - double* data = (double*) buffer.data(); - - size_t offset = 0; - for (auto& innerSize : innerSizes) { - std::vector inner(data + offset, data + offset + innerSize); - result.push_back(inner); - offset += innerSize; - } +// /* +// * (C) Copyright 2024- ECMWF. +// * +// * This software is licensed under the terms of the Apache Licence Version 2.0 +// * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +// * In applying this licence, ECMWF does not waive the privileges and immunities +// * granted to it by virtue of its status as an intergovernmental organisation nor +// * does it submit to any jurisdiction. +// */ + +// /// @author Christopher Bradley + +// #include "eckit/value/Value.h" +// #include "eckit/io/Buffer.h" +// #include "gribjump/Serialiser.h" + + +// namespace gribjump { + +// void Serialiser::encode(eckit::Stream& s, const std::vector& v) { +// size_t size = v.size(); +// s << size; +// eckit::Buffer buffer(v.data(), size * sizeof(double)); +// s << buffer; +// } + +// std::vector Serialiser::decodeVector(eckit::Stream& s) { +// size_t size; +// s >> size; +// eckit::Buffer buffer(size * sizeof(double)); +// s >> buffer; +// double* data = (double*) buffer.data(); + +// return std::vector(data, data + size); +// } + +// // ----------------------------------------------------------------------------------------------- +// // vector of pairs + +// // conclusion: naive is worse for vector of pairs. +// void Serialiser::encode(eckit::Stream& s, const Ranges& v, bool naive) { +// if (naive) { +// size_t size = v.size(); +// s << size; +// for (auto& pair : v) { +// s << pair.first; +// s << pair.second; +// } +// return; +// } +// // else +// size_t size = v.size(); +// s << size; + +// // We know they are pairs, don't need size +// eckit::Buffer buffer(v.data(), v.size() * sizeof(size_t) * 2); +// s << buffer; +// } + +// Ranges Serialiser::decodeRanges(eckit::Stream& s, bool naive) { +// if (naive) { +// Ranges result; +// size_t size; +// s >> size; +// for (size_t i = 0; i < size; i++) { +// size_t first; +// size_t second; +// s >> first; +// s >> second; +// result.push_back(std::make_pair(first, second)); +// } +// return result; +// } +// // else +// size_t size; +// s >> size; +// eckit::Buffer buffer(size * sizeof(size_t) * 2); +// s >> buffer; +// size_t* data = (size_t*) buffer.data(); + +// Ranges result; +// for (size_t i = 0; i < size; i++) { +// result.push_back(std::make_pair(data[i*2], data[i*2 + 1])); +// } +// return result; +// } + + +// // ----------------------------------------------------------------------------------------------- + +// void Serialiser::encode(eckit::Stream& s, const std::vector& v) { +// // Don't want to just do s << str, since this is quite slow. +// // Use a buffer for all strings. +// size_t size = v.size(); +// s << size; +// size_t totalSize = 0; +// for (auto& str : v) { +// totalSize += str.size(); +// s << str.size(); +// } +// eckit::Buffer buffer(totalSize); +// char* data = (char*) buffer.data(); +// for (auto& str : v) { +// for (auto& c : str) { +// *data++ = c; +// } +// } +// s << buffer; +// } + +// std::vector Serialiser::decodeVectorString(eckit::Stream& s) { +// size_t size; +// s >> size; +// std::vector innerSizes; +// size_t totalSize = 0; +// for (size_t i = 0; i < size; i++) { +// size_t innerSize; +// s >> innerSize; +// innerSizes.push_back(innerSize); +// totalSize += innerSize; +// } + +// eckit::Buffer buffer(totalSize); +// s >> buffer; +// char* data = (char*) buffer.data(); + +// std::vector result; +// size_t offset = 0; +// for (auto& innerSize : innerSizes) { +// std::string inner(data + offset, innerSize); +// result.push_back(inner); +// offset += innerSize; +// } + +// return result; +// } + +// // Naive version is actually faster +// void Serialiser::encodeNaive(eckit::Stream& s, const std::vector& v) { +// s << v; +// } + +// std::vector Serialiser::decodeVectorStringNaive(eckit::Stream& s) { +// std::vector result; +// s >> result; +// return result; +// } + +// // ----------------------------------------------------------------------------------------------- + +// void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { +// size_t size = v.size(); +// s << size; +// size_t totalSize = 0; +// for (auto& inner : v) { +// totalSize += inner.size(); +// s << inner.size(); +// } +// eckit::Buffer buffer(totalSize * sizeof(double)); +// double* data = (double*) buffer.data(); +// for (auto& inner : v) { +// for (auto& d : inner) { +// *data++ = d; +// } +// } +// s << buffer; +// } + +// std::vector> Serialiser::decodeVectorVector(eckit::Stream& s) { +// std::vector> result; + +// size_t size; +// s >> size; +// std::vector innerSizes; +// size_t totalSize = 0; +// for (size_t i = 0; i < size; i++) { +// size_t innerSize; +// s >> innerSize; +// innerSizes.push_back(innerSize); +// totalSize += innerSize; +// } + +// eckit::Buffer buffer(totalSize * sizeof(double)); +// s >> buffer; +// double* data = (double*) buffer.data(); + +// size_t offset = 0; +// for (auto& innerSize : innerSizes) { +// std::vector inner(data + offset, data + offset + innerSize); +// result.push_back(inner); +// offset += innerSize; +// } - return result; -} - -// ----------------------------------------------------------------------------------------------- -void Serialiser::encode(eckit::Stream& s, const std::vector& v, bool naive) { - if (naive) { - size_t size = v.size(); - s << size; - for (auto& req : v) { - req.encode(s); - } - return; - } - - std::vector gridHashes; - RangesList ranges; - - // reserve - gridHashes.reserve(v.size()); - ranges.reserve(v.size()); - - for (auto& req : v) { // This copy is grim - gridHashes.push_back(req.gridHash()); - ranges.push_back(req.ranges()); - } - - encodeNaive(s, gridHashes); - s << ranges.size(); - for (auto& r : ranges) { - encode(s, r, false); - } - - // encode the mars requests naively - s << v.size(); - for (auto& req : v) { - s << req.request(); - } -} - -std::vector Serialiser::decodeExtractionRequests(eckit::Stream& s, bool naive) { - if (naive) { - std::vector result; - size_t size; - s >> size; - for (size_t i = 0; i < size; i++) { - result.push_back(ExtractionRequest(s)); - } - return result; - } - - std::vector gridHashes = decodeVectorStringNaive(s); +// return result; +// } + +// // ----------------------------------------------------------------------------------------------- +// void Serialiser::encode(eckit::Stream& s, const std::vector& v, bool naive) { +// if (naive) { +// size_t size = v.size(); +// s << size; +// for (auto& req : v) { +// req.encode(s); +// } +// return; +// } + +// std::vector gridHashes; +// RangesList ranges; + +// // reserve +// gridHashes.reserve(v.size()); +// ranges.reserve(v.size()); + +// for (auto& req : v) { // This copy is grim +// gridHashes.push_back(req.gridHash()); +// ranges.push_back(req.ranges()); +// } + +// encodeNaive(s, gridHashes); +// s << ranges.size(); +// for (auto& r : ranges) { +// encode(s, r, false); +// } + +// // encode the mars requests naively +// s << v.size(); +// for (auto& req : v) { +// s << req.request(); +// } +// } + +// std::vector Serialiser::decodeExtractionRequests(eckit::Stream& s, bool naive) { +// if (naive) { +// std::vector result; +// size_t size; +// s >> size; +// for (size_t i = 0; i < size; i++) { +// result.push_back(ExtractionRequest(s)); +// } +// return result; +// } + +// std::vector gridHashes = decodeVectorStringNaive(s); - size_t numRanges; - s >> numRanges; - RangesList ranges; - ranges.reserve(numRanges); - for (size_t i = 0; i < numRanges; i++) { - ranges.push_back(decodeRanges(s, false)); - } - - std::vector marsrequests; - size_t numMarsRequests; - s >> numMarsRequests; - marsrequests.reserve(numMarsRequests); - for (size_t i = 0; i < numMarsRequests; i++) { - metkit::mars::MarsRequest marsrequest(s); - marsrequests.push_back(marsrequest); - } - - // repack - std::vector result; - for (size_t i = 0; i < marsrequests.size(); i++) { - result.push_back(ExtractionRequest(marsrequests[i], ranges[i], gridHashes[i])); - } - - return result; -} - - -// ----------------------------------------------------------------------------------------------- -void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { - size_t size = v.size(); - s << size; - for (auto& inner : v) { - size_t innerSize = inner.size(); - s << innerSize; - for (auto& res : inner) { - res.encode(s); - } - } - -} - -std::vector> Serialiser::decodeExtractionResults(eckit::Stream& s) { - std::vector> result; - - size_t size; - s >> size; - for (size_t i = 0; i < size; i++) { - size_t innerSize; - s >> innerSize; - std::vector inner; - for (size_t j = 0; j < innerSize; j++) { - inner.push_back(ExtractionResult(s)); - } - result.push_back(std::move(inner)); - } - - return result; -} - - -} // namespace gribjump +// size_t numRanges; +// s >> numRanges; +// RangesList ranges; +// ranges.reserve(numRanges); +// for (size_t i = 0; i < numRanges; i++) { +// ranges.push_back(decodeRanges(s, false)); +// } + +// std::vector marsrequests; +// size_t numMarsRequests; +// s >> numMarsRequests; +// marsrequests.reserve(numMarsRequests); +// for (size_t i = 0; i < numMarsRequests; i++) { +// metkit::mars::MarsRequest marsrequest(s); +// marsrequests.push_back(marsrequest); +// } + +// // repack +// std::vector result; +// for (size_t i = 0; i < marsrequests.size(); i++) { +// result.push_back(ExtractionRequest(marsrequests[i], ranges[i], gridHashes[i])); +// } + +// return result; +// } + + +// // ----------------------------------------------------------------------------------------------- +// void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { +// size_t size = v.size(); +// s << size; +// for (auto& inner : v) { +// size_t innerSize = inner.size(); +// s << innerSize; +// for (auto& res : inner) { +// res.encode(s); +// } +// } + +// } + +// std::vector> Serialiser::decodeExtractionResults(eckit::Stream& s) { +// std::vector> result; + +// size_t size; +// s >> size; +// for (size_t i = 0; i < size; i++) { +// size_t innerSize; +// s >> innerSize; +// std::vector inner; +// for (size_t j = 0; j < innerSize; j++) { +// inner.push_back(ExtractionResult(s)); +// } +// result.push_back(std::move(inner)); +// } + +// return result; +// } + + +// } // namespace gribjump diff --git a/src/gribjump/Serialiser.h b/src/gribjump/Serialiser.h index cd03717..ae7f2cc 100644 --- a/src/gribjump/Serialiser.h +++ b/src/gribjump/Serialiser.h @@ -1,53 +1,53 @@ -/* - * (C) Copyright 2024- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation nor - * does it submit to any jurisdiction. - */ +// /* +// * (C) Copyright 2024- ECMWF. +// * +// * This software is licensed under the terms of the Apache Licence Version 2.0 +// * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +// * In applying this licence, ECMWF does not waive the privileges and immunities +// * granted to it by virtue of its status as an intergovernmental organisation nor +// * does it submit to any jurisdiction. +// */ -/// @author Christopher Bradley +// /// @author Christopher Bradley -#pragma once +// #pragma once -#include -#include "eckit/serialisation/Stream.h" -#include "gribjump/ExtractionData.h" -#include "gribjump/Types.h" +// #include +// #include "eckit/serialisation/Stream.h" +// #include "gribjump/ExtractionData.h" +// #include "gribjump/Types.h" -// Class to help with serialisation of containers +// // Class to help with serialisation of containers -namespace gribjump { +// namespace gribjump { -class Serialiser { -public: - static void encode(eckit::Stream& s, const std::vector& v); - static std::vector decodeVector(eckit::Stream& s); +// class Serialiser { +// public: +// static void encode(eckit::Stream& s, const std::vector& v); +// static std::vector decodeVector(eckit::Stream& s); - static void encode(eckit::Stream& s, const std::vector& v); - static std::vector decodeVectorString(eckit::Stream& s); +// static void encode(eckit::Stream& s, const std::vector& v); +// static std::vector decodeVectorString(eckit::Stream& s); - static void encode(eckit::Stream& s, const std::vector>& v); - static std::vector> decodeVectorVector(eckit::Stream& s); +// static void encode(eckit::Stream& s, const std::vector>& v); +// static std::vector> decodeVectorVector(eckit::Stream& s); - static void encode(eckit::Stream& s, const std::vector& v, bool naive=false); - static std::vector decodeExtractionRequests(eckit::Stream& s, bool naive=false); +// static void encode(eckit::Stream& s, const std::vector& v, bool naive=false); +// static std::vector decodeExtractionRequests(eckit::Stream& s, bool naive=false); - // We tend to have a vector of vectors of ExtractionResults - static void encode(eckit::Stream& s, const std::vector>& v); - static std::vector> decodeExtractionResults(eckit::Stream& s); +// // We tend to have a vector of vectors of ExtractionResults +// static void encode(eckit::Stream& s, const std::vector>& v); +// static std::vector> decodeExtractionResults(eckit::Stream& s); - static void encode(eckit::Stream& s, const Ranges& v, bool naive=false); - static Ranges decodeRanges(eckit::Stream& s, bool naive=false); +// static void encode(eckit::Stream& s, const Ranges& v, bool naive=false); +// static Ranges decodeRanges(eckit::Stream& s, bool naive=false); - //------------------------------------------------------------------------------------------------- - // Naive implementations, for timing comparison - static void encodeNaive(eckit::Stream& s, const std::vector& v); - static std::vector decodeVectorStringNaive(eckit::Stream& s); -}; +// //------------------------------------------------------------------------------------------------- +// // Naive implementations, for timing comparison +// static void encodeNaive(eckit::Stream& s, const std::vector& v); +// static std::vector decodeVectorStringNaive(eckit::Stream& s); +// }; -} // namespace gribjump +// } // namespace gribjump diff --git a/src/gribjump/gribjump_c.cc b/src/gribjump/gribjump_c.cc index 9d183b1..e95811e 100644 --- a/src/gribjump/gribjump_c.cc +++ b/src/gribjump/gribjump_c.cc @@ -227,7 +227,8 @@ int gribjump_delete_result(gribjump_extraction_result_t* result) { int extract_single(gribjump_handle_t* handle, gribjump_extraction_request_t* request, gribjump_extraction_result_t*** results_array, unsigned long* nfields) { return wrapApiFunction([=] { ExtractionRequest req = *request; - std::vector>> resultsv = handle->extract(std::vector{req}); + std::vector vec = {req}; + std::vector>> resultsv = handle->extract(vec); ASSERT(resultsv.size() == 1); std::vector> results = std::move(resultsv[0]); diff --git a/src/gribjump/remote/RemoteGribJump.cc b/src/gribjump/remote/RemoteGribJump.cc index 1872e68..15c036b 100644 --- a/src/gribjump/remote/RemoteGribJump.cc +++ b/src/gribjump/remote/RemoteGribJump.cc @@ -41,7 +41,7 @@ void RemoteGribJump::sendHeader(eckit::net::InstantTCPStream& stream, RequestTyp stream << static_cast(type); } -size_t RemoteGribJump::scan(const std::vector requests, bool byfiles) { +size_t RemoteGribJump::scan(const std::vector& requests, bool byfiles) { eckit::Timer timer("RemoteGribJump::scan()"); // connect to server @@ -78,7 +78,7 @@ size_t RemoteGribJump::scan(const std::vector request return count; } -std::vector>> RemoteGribJump::extract(std::vector requests) { +std::vector>> RemoteGribJump::extract(std::vector& requests) { eckit::Timer timer("RemoteGribJump::extract()"); std::vector>> result; @@ -120,7 +120,9 @@ std::vector> RemoteGribJump::extract(const eckit NOTIMP; } +// Forward extraction request to another server void RemoteGribJump::extract(filemap_t& filemap){ + eckit::Timer timer("RemoteGribJump::extract()"); ///@todo we could probably do the connection logic in the ctor @@ -143,9 +145,8 @@ void RemoteGribJump::extract(filemap_t& filemap){ size_t nItems = extractionItems.size(); stream << nItems; for (auto& item : extractionItems) { - // ExtractionRequest req(item->request(), item->intervals()); - metkit::mars::MarsRequest r(""); // no need to send mars request when we have uri - ExtractionRequest req(r, item->intervals(), item->gridHash()); + // We have URI, no need to send a request string + ExtractionRequest req("", item->intervals(), item->gridHash()); stream << req; stream << item->URI(); } diff --git a/src/gribjump/remote/RemoteGribJump.h b/src/gribjump/remote/RemoteGribJump.h index be7dabc..eb6e54c 100644 --- a/src/gribjump/remote/RemoteGribJump.h +++ b/src/gribjump/remote/RemoteGribJump.h @@ -36,9 +36,9 @@ class RemoteGribJump : public GribJumpBase { size_t scan(const std::vector& path) override { NOTIMP; } - size_t scan(const std::vector requests, bool byfiles) override; + size_t scan(const std::vector& requests, bool byfiles) override; - std::vector>> extract(std::vector polyRequest) override; + std::vector>> extract(std::vector& polyRequest) override; std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) override; void extract(filemap_t& filemap); diff --git a/src/gribjump/remote/Request.cc b/src/gribjump/remote/Request.cc index 2793c6c..93f6f0d 100644 --- a/src/gribjump/remote/Request.cc +++ b/src/gribjump/remote/Request.cc @@ -88,8 +88,6 @@ ExtractRequest::ExtractRequest(eckit::Stream& stream) : Request(stream) { requests_.push_back(req); } - flatten_ = false; // xxx hard coded for now - MetricsManager::instance().set("count_requests", nRequests); } @@ -98,7 +96,7 @@ ExtractRequest::~ExtractRequest() { void ExtractRequest::execute() { - results_ = engine_.extract(requests_, flatten_); + results_ = engine_.extract(requests_); if (LibGribJump::instance().debug()) { for (auto& pair : results_) { @@ -120,7 +118,7 @@ void ExtractRequest::replyToClient() { for (size_t i = 0; i < nRequests; i++) { LOG_DEBUG_LIB(LibGribJump) << "Sending result " << i << " to client" << std::endl; - auto it = results_.find(requests_[i].request_string()); + auto it = results_.find(requests_[i].requestString()); ASSERT(it != results_.end()); std::vector>& items = it->second; // ExtractionItems items = it->second; diff --git a/src/gribjump/remote/Request.h b/src/gribjump/remote/Request.h index 7fe014a..463b972 100644 --- a/src/gribjump/remote/Request.h +++ b/src/gribjump/remote/Request.h @@ -87,7 +87,6 @@ class ExtractRequest : public Request { private: std::vector requests_; - bool flatten_; ResultsMap results_; diff --git a/src/gribjump/tools/ToolUtils.cc b/src/gribjump/tools/ToolUtils.cc index 3a1e008..ab42258 100644 --- a/src/gribjump/tools/ToolUtils.cc +++ b/src/gribjump/tools/ToolUtils.cc @@ -13,7 +13,8 @@ #include "eckit/utils/StringTools.h" #include "eckit/filesystem/PathName.h" - +#include "metkit/mars/MarsExpension.h" +#include "gribjump/LibGribJump.h" #include "gribjump/tools/ToolUtils.h" namespace gribjump { @@ -49,4 +50,33 @@ std::vector> parseRangesFile(eckit::PathName fname) { return allRanges; } +class CollectFlattenedRequests : public metkit::mars::FlattenCallback { +public: + CollectFlattenedRequests(std::vector& flattenedRequests) : flattenedRequests_(flattenedRequests) {} + + virtual void operator()(const metkit::mars::MarsRequest& req) { + flattenedRequests_.push_back(req); + } + + std::vector& flattenedRequests_; +}; + +std::vector flattenRequest(const metkit::mars::MarsRequest& request) { + + metkit::mars::MarsExpension expansion(false); + metkit::mars::DummyContext ctx; + std::vector flattenedRequests; + + CollectFlattenedRequests cb(flattenedRequests); + expansion.flatten(ctx, request, cb); + + LOG_DEBUG_LIB(LibGribJump) << "Base request: " << request << std::endl; + + for (const auto& req : flattenedRequests) { + LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; + } + + return flattenedRequests; +} + } // namespace gribjump diff --git a/src/gribjump/tools/ToolUtils.h b/src/gribjump/tools/ToolUtils.h index c700ec9..c74f641 100644 --- a/src/gribjump/tools/ToolUtils.h +++ b/src/gribjump/tools/ToolUtils.h @@ -12,12 +12,14 @@ #pragma once #include "eckit/filesystem/PathName.h" +#include "metkit/mars/MarsRequest.h" #include "gribjump/ExtractionData.h" namespace gribjump { std::vector> parseRangesFile(eckit::PathName fname); +std::vector flattenRequest(const metkit::mars::MarsRequest& request); } // namespace gribjump diff --git a/src/tools/gribjump-extract.cc b/src/tools/gribjump-extract.cc index 752ca3c..91e3762 100644 --- a/src/tools/gribjump-extract.cc +++ b/src/tools/gribjump-extract.cc @@ -49,7 +49,7 @@ void GribJumpExtract::usage(const std::string &tool) const { void GribJumpExtract::execute(const eckit::option::CmdArgs &args) { // Testing tool for extract / directJump functionality - + using MarsRequests = metkit::mars::MarsRequest; const bool raw = args.getBool("raw", false); const bool printout = args.getBool("print", true); @@ -84,8 +84,17 @@ void GribJumpExtract::execute(const eckit::option::CmdArgs &args) { std::vector polyRequest; for (size_t i = 0; i < requests.size(); i++) { - ExtractionRequest exrequest(requests[i], allRanges[i]); - polyRequest.push_back(exrequest); + // Flatten and remove verb + std::vector flattenedRequests = flattenRequest(requests[i]); + for (auto& req : flattenedRequests) { + std::string s = req.asString(); + // remove "retrieve," from the beginning, if it exists + if (s.find("retrieve,") == 0) { + s = s.substr(9); + } + ExtractionRequest exrequest(s, allRanges[i]); + polyRequest.push_back(exrequest); + } } // Grid hash diff --git a/src/tools/gribjump-validate.cc b/src/tools/gribjump-validate.cc index 00397fe..90bea56 100644 --- a/src/tools/gribjump-validate.cc +++ b/src/tools/gribjump-validate.cc @@ -85,7 +85,7 @@ void CompareEccodes::execute(const eckit::option::CmdArgs &args) { std::vector polyRequest; for (size_t i = 0; i < requests.size(); i++) { - ExtractionRequest exrequest(requests[i], allRanges[i]); + ExtractionRequest exrequest(requests[i].asString(), allRanges[i]); polyRequest.push_back(exrequest); } diff --git a/tests/remote/test_remote.cc b/tests/remote/test_remote.cc index de868b8..84915cf 100644 --- a/tests/remote/test_remote.cc +++ b/tests/remote/test_remote.cc @@ -48,10 +48,16 @@ static eckit::PathName metricsFile = "test_metrics"; CASE( "Remote protocol: extract" ) { // --- Extract - std::vector requests = { - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), + // std::vector requests = { + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), + // }; + + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", }; + std::vector> allIntervals = { {std::make_pair(0, 5), std::make_pair(20, 30)}, diff --git a/tests/test_api.cc b/tests/test_api.cc index 8bd2346..d6f6bbf 100644 --- a/tests/test_api.cc +++ b/tests/test_api.cc @@ -37,7 +37,7 @@ namespace test { constexpr double MISSING = std::numeric_limits::quiet_NaN(); void compareValues(const std::vector>>>& expectedValues, const std::vector>>& output) { - EXPECT(expectedValues.size() == output.size()); + EXPECT_EQUAL(expectedValues.size(), output.size()); for (size_t i = 0; i < expectedValues.size(); i++) { // each mars request EXPECT_EQUAL(expectedValues[i].size(), output[i].size()); for (size_t j = 0; j < expectedValues[i].size(); j++) { // each field @@ -98,19 +98,11 @@ CASE( "test_gribjump_api_extract" ) { // Test 1: Extract 3 fields. Each field has a different set of ranges - std::vector requests; - { - std::istringstream s( - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc\n" - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc\n" - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc\n" - ); - metkit::mars::MarsParser parser(s); - auto parsedRequests = parser.parse(); - metkit::mars::MarsExpension expand(/* inherit */ false); - requests = expand.expand(parsedRequests); - } - + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc" + }; std::vector> allIntervals = { { @@ -141,14 +133,16 @@ CASE( "test_gribjump_api_extract" ) { // Eccodes expected values std::vector>>> expectedValues; for (auto req : polyRequest1) { - expectedValues.push_back(eccodesExtract(req.request(), req.ranges())); + metkit::mars::MarsRequest marsreq = fdb5::FDBToolRequest::requestsFromString(req.requestString())[0].request(); + expectedValues.push_back(eccodesExtract(marsreq, req.ranges())); } compareValues(expectedValues, output1); // -------------------------------------------------------------------------------------------- +#if 0 // NO LONGER SUPPORTED // Test 2: Extract same fields as Test 1, but in a single step=2/1/3 request. One set of ranges for all fields. - + std::vector marsrequests; { std::istringstream s( "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc\n" @@ -156,8 +150,12 @@ CASE( "test_gribjump_api_extract" ) { metkit::mars::MarsParser parser(s); auto parsedRequests = parser.parse(); metkit::mars::MarsExpension expand(/* inherit */ false); - requests = expand.expand(parsedRequests); + marsrequests = expand.expand(parsedRequests); } + + requests = { + "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc" + }; std::vector ranges = allIntervals[0]; PolyRequest polyRequest2; @@ -168,25 +166,29 @@ CASE( "test_gribjump_api_extract" ) { EXPECT(output2[0].size() == 3); expectedValues.clear(); - expectedValues.push_back(eccodesExtract(requests[0], ranges)); + expectedValues.push_back(eccodesExtract(marsrequests[0], ranges)); compareValues(expectedValues, output2); - +#endif // -------------------------------------------------------------------------------------------- + std::vector ranges = allIntervals[0]; - // Test 2.b: Extract but with an md5 hash - EXPECT_THROWS_AS(gj.extract({ExtractionRequest(requests[0], ranges)}), eckit::SeriousBug); // missing hash - EXPECT_THROWS_AS(gj.extract({ExtractionRequest(requests[0], ranges, "wronghash")}), eckit::SeriousBug); // incorrect hash + // Test 1.b: Extract but with an md5 hash + std::vector vec = {ExtractionRequest(requests[0], ranges)}; + EXPECT_THROWS_AS(gj.extract(vec), eckit::SeriousBug); // missing hash + vec = {ExtractionRequest(requests[0], ranges, "wronghash")}; + EXPECT_THROWS_AS(gj.extract(vec), eckit::SeriousBug); // incorrect hash // correct hash - std::vector>> output2c = gj.extract({ExtractionRequest(requests[0], ranges, gridHash)}); + vec = {ExtractionRequest(requests[0], ranges, gridHash)}; + std::vector>> output2c = gj.extract(vec); EXPECT_EQUAL(output2c[0][0]->total_values(), 15); - // -------------------------------------------------------------------------------------------- + // // -------------------------------------------------------------------------------------------- // Test 3: Extract function using path and offsets, which skips engine and related tasks/checks. std::vector uris; - fdb5::FDBToolRequest fdbreq(requests[0]); + fdb5::FDBToolRequest fdbreq = fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc")[0]; auto listIter = fdb.list(fdbreq, false); fdb5::ListElement elem; while (listIter.next(elem)) { @@ -223,6 +225,9 @@ CASE( "test_gribjump_api_extract" ) { } // Expect output to be the same as output2[0] + expectedValues.clear(); + expectedValues.push_back(eccodesExtract(fdbreq.request(), ranges)); + std::vector>> output3v; output3v.push_back(std::move(output3)); // i.e. == {output3} compareValues(expectedValues, output3v); diff --git a/tests/test_engine.cc b/tests/test_engine.cc index d8ee6ca..bdcf7a1 100644 --- a/tests/test_engine.cc +++ b/tests/test_engine.cc @@ -96,127 +96,128 @@ CASE ("Engine: pre-test setup") { CASE ("Engine: Basic extraction") { - NOTIMP; // // --- Setup - // eckit::testing::SetEnv fdbconfig("FDB5_CONFIG", fdbConfig(tmpdir).c_str()); - // eckit::testing::SetEnv allowmissing("GRIBJUMP_ALLOW_MISSING", "0"); // We have deliberately missing data in the request. - - // // --- Extract (test 1) - // std::vector requests = { - // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), - // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), - // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc")[0].request(), - // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc")[0].request() // Deliberately missing data - // }; - - // std::vector> allIntervals = { - // {std::make_pair(0, 5), std::make_pair(20, 30)}, - // {std::make_pair(0, 5), std::make_pair(20, 30)}, - // {std::make_pair(0, 5), std::make_pair(20, 30)}, - // {std::make_pair(0, 5), std::make_pair(20, 30)} - // }; - - // Engine engine; - // ExtractionRequests exRequests; - // for (size_t i = 0; i < requests.size(); i++) { - // exRequests.push_back(ExtractionRequest(requests[i], allIntervals[i], gridHash)); - // } - // // We expect a throw due to missing data - // EXPECT_THROWS_AS(engine.extract(exRequests, false), DataNotFoundException); - - // // drop the final request - // exRequests.pop_back(); - - // ResultsMap results = engine.extract(exRequests, false); - // EXPECT_NO_THROW(engine.raiseErrors()); - - // // print contents of map - // for (auto& [req, exs] : results) { - // LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; - // for (auto& ex : exs) { - // ex->debug_print(); - // } - // } - - // // Check correct values - // size_t count = 0; - // for (size_t i = 0; i < 3; i++) { - // metkit::mars::MarsRequest req = requests[i]; - // std::vector intervals = allIntervals[i]; - // auto& exs = results[req]; - // auto comparisonValues = eccodesExtract(req, intervals); - // for (size_t j = 0; j < exs.size(); j++) { - // for (size_t k = 0; k < comparisonValues[j].size(); k++) { - // for (size_t l = 0; l < comparisonValues[j][k].size(); l++) { - // count++; - // double v = exs[j]->values()[k][l]; - // if (std::isnan(v)) { - // EXPECT(comparisonValues[j][k][l] == 9999); - // continue; - // } - - // EXPECT(comparisonValues[j][k][l] == v); - // } - // } - // } - // } - // // only count the 3 intervals with data - // EXPECT(count == 45); - - // // --- Extract (test 2) - // // Same request, all in one (test flattening) - // /// @todo, currently, the user cannot know order of the results after flattening, making this feature not very useful. - // /// We impose an order internally (currently, alphabetical). + eckit::testing::SetEnv fdbconfig("FDB5_CONFIG", fdbConfig(tmpdir).c_str()); + eckit::testing::SetEnv allowmissing("GRIBJUMP_ALLOW_MISSING", "0"); // We have deliberately missing data in the request. + + // --- Extract (test 1) + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc" // Deliberately missing data + }; + + std::vector> allIntervals = { + {std::make_pair(0, 5), std::make_pair(20, 30)}, + {std::make_pair(0, 5), std::make_pair(20, 30)}, + {std::make_pair(0, 5), std::make_pair(20, 30)}, + {std::make_pair(0, 5), std::make_pair(20, 30)} + }; + + Engine engine; + ExtractionRequests exRequests; + for (size_t i = 0; i < requests.size(); i++) { + exRequests.push_back(ExtractionRequest(requests[i], allIntervals[i], gridHash)); + } + // We expect a throw due to missing data + EXPECT_THROWS_AS(engine.extract(exRequests), DataNotFoundException); + + // drop the final request + exRequests.pop_back(); + + ResultsMap results = engine.extract(exRequests); + EXPECT_NO_THROW(engine.raiseErrors()); + + // print contents of map + for (auto& [req, exs] : results) { + LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; + for (auto& ex : exs) { + ex->debug_print(); + } + } + + // Check correct values + size_t count = 0; + for (size_t i = 0; i < 3; i++) { + metkit::mars::MarsRequest req = fdb5::FDBToolRequest::requestsFromString(requests[i])[0].request(); + std::vector intervals = allIntervals[i]; + auto& exs = results[requests[i]]; + auto comparisonValues = eccodesExtract(req, intervals); + for (size_t j = 0; j < exs.size(); j++) { + for (size_t k = 0; k < comparisonValues[j].size(); k++) { + for (size_t l = 0; l < comparisonValues[j][k].size(); l++) { + count++; + double v = exs[j]->values()[k][l]; + if (std::isnan(v)) { + EXPECT(comparisonValues[j][k][l] == 9999); + continue; + } + + EXPECT(comparisonValues[j][k][l] == v); + } + } + } + } + // only count the 3 intervals with data + EXPECT(count == 45); + +#if 0 + // --- Extract (test 2) + // Same request, all in one (test flattening) + /// @todo, currently, the user cannot know order of the results after flattening, making this feature not very useful. + /// We impose an order internally (currently, alphabetical). - // allIntervals = { - // {std::make_pair(0, 5), std::make_pair(20, 30)}, - // }; - - // requests = { - // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1/2/3,stream=oper,time=1200,type=fc")[0].request() - // }; - - // ASSERT(requests.size() == 1); - - // exRequests.clear(); - // exRequests.push_back(ExtractionRequest(requests[0], allIntervals[0], gridHash)); - - // results = engine.extract(exRequests, true); - // EXPECT_NO_THROW(engine.raiseErrors()); - - // // print contents of map - // for (auto& [req, exs] : results) { - // LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; - // for (auto& ex : exs) { - // ex->debug_print(); - // } - // } - - // // compare results - - // metkit::mars::MarsRequest req = requests[0]; - // auto& exs = results[req]; - // auto comparisonValues = eccodesExtract(req, allIntervals[0])[0]; // [0] Because each archived field has identical values. - // count = 0; - // for (size_t j = 0; j < exs.size(); j++) { - // auto values = exs[j]->values(); - // for (size_t k = 0; k < values.size(); k++) { - // for (size_t l = 0; l < values[k].size(); l++) { - // count++; - // double v = values[k][l]; - // if (std::isnan(v)) { - // EXPECT(comparisonValues[k][l] == 9999); - // continue; - // } - - // EXPECT(comparisonValues[k][l] == v); - // } - // } - // } - // EXPECT(count == 45); - - // /// @todo: request touching multiple files? - // /// @todo: request involving unsupported packingType? + allIntervals = { + {std::make_pair(0, 5), std::make_pair(20, 30)}, + }; + + requests = { + fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1/2/3,stream=oper,time=1200,type=fc")[0].request() + }; + + ASSERT(requests.size() == 1); + + exRequests.clear(); + exRequests.push_back(ExtractionRequest(requests[0], allIntervals[0], gridHash)); + + results = engine.extract(exRequests, true); + EXPECT_NO_THROW(engine.raiseErrors()); + + // print contents of map + for (auto& [req, exs] : results) { + LOG_DEBUG_LIB(LibGribJump) << "Request: " << req << std::endl; + for (auto& ex : exs) { + ex->debug_print(); + } + } + + // compare results + + metkit::mars::MarsRequest req = requests[0]; + auto& exs = results[req]; + auto comparisonValues = eccodesExtract(req, allIntervals[0])[0]; // [0] Because each archived field has identical values. + count = 0; + for (size_t j = 0; j < exs.size(); j++) { + auto values = exs[j]->values(); + for (size_t k = 0; k < values.size(); k++) { + for (size_t l = 0; l < values[k].size(); l++) { + count++; + double v = values[k][l]; + if (std::isnan(v)) { + EXPECT(comparisonValues[k][l] == 9999); + continue; + } + + EXPECT(comparisonValues[k][l] == v); + } + } + } + EXPECT(count == 45); +#endif + + /// @todo: request touching multiple files? + /// @todo: request involving unsupported packingType? } diff --git a/tests/test_gribinfo.cc b/tests/test_gribinfo.cc index 2c116e9..f0479f2 100644 --- a/tests/test_gribinfo.cc +++ b/tests/test_gribinfo.cc @@ -233,39 +233,37 @@ CASE ("test_wrong_jumper") { // Testing the extract functionality using ExtractionItem // ~ i.e. internals of FileExtractionTask CASE ("test_ExtractionItem_extract") { - NOTIMP; - // metkit::mars::MarsRequest request("none"); - // auto intervals = std::vector{{0, 10}, {3000000, 3000010}, {6599670, 6599680}}; - // ExtractionItem exItem(request, intervals ); + auto intervals = std::vector{{0, 10}, {3000000, 3000010}, {6599670, 6599680}}; + ExtractionItem exItem("", intervals ); - // eckit::PathName path = "2t_O1280.grib"; + eckit::PathName path = "2t_O1280.grib"; - // exItem.URI(eckit::URI(path)); + exItem.URI(eckit::URI(path)); - // eckit::FileHandle fh(path); - // fh.openForRead(); + eckit::FileHandle fh(path); + fh.openForRead(); - // eckit::Offset offset = 0; + eckit::Offset offset = 0; - // std::unique_ptr info(InfoFactory::instance().build(fh, offset)); - // EXPECT(info); + std::unique_ptr info(InfoFactory::instance().build(fh, offset)); + EXPECT(info); - // std::unique_ptr jumper(JumperFactory::instance().build(*info)); + std::unique_ptr jumper(JumperFactory::instance().build(*info)); - // jumper->extract(fh, offset, *info, exItem); + jumper->extract(fh, offset, *info, exItem); - // exItem.debug_print(); + exItem.debug_print(); - // // Check correct values - // std::vector> comparisonValues = eccodesExtract(path, {offset}, intervals)[0]; - // EXPECT(comparisonValues.size() == 3); - - // for (size_t i = 0; i < comparisonValues.size(); i++) { - // EXPECT(comparisonValues[i].size() == 10); - // for (size_t j = 0; j < comparisonValues[i].size(); j++) { - // EXPECT(comparisonValues[i][j] == exItem.values()[i][j]); - // } - // } + // Check correct values + std::vector> comparisonValues = eccodesExtract(path, {offset}, intervals)[0]; + EXPECT(comparisonValues.size() == 3); + + for (size_t i = 0; i < comparisonValues.size(); i++) { + EXPECT(comparisonValues[i].size() == 10); + for (size_t j = 0; j < comparisonValues[i].size(); j++) { + EXPECT(comparisonValues[i][j] == exItem.values()[i][j]); + } + } } //----------------------------------------------------------------------------- diff --git a/tests/test_serialiser.cc b/tests/test_serialiser.cc index 098c55c..92eaf43 100644 --- a/tests/test_serialiser.cc +++ b/tests/test_serialiser.cc @@ -37,16 +37,18 @@ constexpr size_t N_EXTRACTIONREQUESTS = 10; constexpr size_t N_EXTRACTIONRESULTS = 10; -void reportTimes(size_t N, double serialiseTime, double deserialiseTime) { - if (!REPORT_TIMES) return; - eckit::Log::info() << " For N=" << N << std::endl; - eckit::Log::info() << " Serialisation time: " << serialiseTime << std::endl; - eckit::Log::info() << " Deserialisation time: " << deserialiseTime << std::endl; - eckit::Log::info() << " Total time: " << serialiseTime + deserialiseTime << std::endl; -} +// void reportTimes(size_t N, double serialiseTime, double deserialiseTime) { +// if (!REPORT_TIMES) return; +// eckit::Log::info() << " For N=" << N << std::endl; +// eckit::Log::info() << " Serialisation time: " << serialiseTime << std::endl; +// eckit::Log::info() << " Deserialisation time: " << deserialiseTime << std::endl; +// eckit::Log::info() << " Total time: " << serialiseTime + deserialiseTime << std::endl; +// } -//----------------------------------------------------------------------------- +// //----------------------------------------------------------------------------- +CASE( "nothing" ) { +} // CASE( "Serialisation: Vector" ) { @@ -83,270 +85,270 @@ void reportTimes(size_t N, double serialiseTime, double deserialiseTime) { // } -// //----------------------------------------------------------------------------- +// // //----------------------------------------------------------------------------- -// CASE( "Serialisation Naive: Vector" ) { +// // CASE( "Serialisation Naive: Vector" ) { + +// // eckit::PathName filename = "test_serialiser.out"; + +// // std::vector vout; +// // for (size_t i = 0; i < N_VECTOR; i++) { +// // vout.push_back("this is a test string look at it go woah " + std::to_string(i)); +// // } + +// // eckit::Timer timer_serialise; +// // { +// // eckit::FileStream sout(filename, "w"); +// // auto c = eckit::closer(sout); +// // Serialiser::encodeNaive(sout, vout); +// // } +// // timer_serialise.stop(); + +// // eckit::Timer timer_deserialise; +// // std::vector vin; +// // { +// // eckit::FileStream sin(filename, "r"); +// // auto c = eckit::closer(sin); +// // vin = Serialiser::decodeVectorStringNaive(sin); +// // } +// // timer_deserialise.stop(); + +// // EXPECT_EQUAL(vout.size(), vin.size()); +// // for (size_t i = 0; i < vout.size(); i++) { +// // EXPECT_EQUAL(vout[i], vin[i]); +// // } + +// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// // } + +// // //----------------------------------------------------------------------------- + +// // CASE( "Serialisation: Vector" ) { + +// // eckit::PathName filename = "test_serialiser.out"; + +// // std::vector vout; +// // for (size_t i = 0; i < N_VECTOR; i++) { +// // vout.push_back("this is a test string look at it go woah and look its getting even bigger now this is probably big enough " + std::to_string(i)); +// // } -// eckit::PathName filename = "test_serialiser.out"; +// // eckit::Timer timer_serialise; +// // { +// // eckit::FileStream sout(filename, "w"); +// // auto c = eckit::closer(sout); +// // Serialiser::encode(sout, vout); +// // } +// // timer_serialise.stop(); -// std::vector vout; +// // eckit::Timer timer_deserialise; +// // std::vector vin; +// // { +// // eckit::FileStream sin(filename, "r"); +// // auto c = eckit::closer(sin); +// // vin = Serialiser::decodeVectorString(sin); +// // } +// // timer_deserialise.stop(); + +// // EXPECT_EQUAL(vout.size(), vin.size()); +// // for (size_t i = 0; i < vout.size(); i++) { +// // EXPECT_EQUAL(vout[i], vin[i]); +// // } + +// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// // } + + + +// //----------------------------------------------------------------------------- + +// CASE( "Serialisation: Ranges" ) { + +// eckit::PathName filename = "test_serialiser.out"; + +// bool naive = false; +// for (size_t i = 0; i < 2; i++) { +// Ranges vout; // for (size_t i = 0; i < N_VECTOR; i++) { -// vout.push_back("this is a test string look at it go woah " + std::to_string(i)); +// vout.push_back(Range(i, i+10)); // } - + // eckit::Timer timer_serialise; // { // eckit::FileStream sout(filename, "w"); // auto c = eckit::closer(sout); -// Serialiser::encodeNaive(sout, vout); +// Serialiser::encode(sout, vout, naive); // } // timer_serialise.stop(); - + // eckit::Timer timer_deserialise; -// std::vector vin; +// Ranges vin; // { // eckit::FileStream sin(filename, "r"); // auto c = eckit::closer(sin); -// vin = Serialiser::decodeVectorStringNaive(sin); +// vin = Serialiser::decodeRanges(sin, naive); // } // timer_deserialise.stop(); - + // EXPECT_EQUAL(vout.size(), vin.size()); // for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i], vin[i]); +// EXPECT_EQUAL(vout[i].first, vin[i].first); +// EXPECT_EQUAL(vout[i].second, vin[i].second); // } - + +// eckit::Log::info() << "Naive: " << naive << std::endl; // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// naive = !naive; +// } // } // //----------------------------------------------------------------------------- -// CASE( "Serialisation: Vector" ) { +// // CASE( "Serialisation: Vector>" ) { -// eckit::PathName filename = "test_serialiser.out"; +// // eckit::PathName filename = "test_serialiser.out"; + +// // std::vector> vout; +// // for (size_t i = 0; i < N_VECTOR_VECTOR; i++) { +// // std::vector inner = {1.0, 2.0, 3.0}; +// // vout.push_back(inner); +// // } + +// // eckit::Timer timer_serialise; +// // { +// // eckit::FileStream sout(filename, "w"); +// // auto c = eckit::closer(sout); +// // Serialiser::encode(sout, vout); +// // } +// // timer_serialise.stop(); + +// // eckit::Timer timer_deserialise; +// // std::vector> vin; +// // { +// // eckit::FileStream sin(filename, "r"); +// // auto c = eckit::closer(sin); +// // vin = Serialiser::decodeVectorVector(sin); +// // } +// // timer_deserialise.stop(); + +// // EXPECT_EQUAL(vout.size(), vin.size()); +// // for (size_t i = 0; i < vout.size(); i++) { +// // EXPECT_EQUAL(vout[i].size(), vin[i].size()); +// // for (size_t j = 0; j < vout[i].size(); j++) { +// // EXPECT_EQUAL(vout[i][j], vin[i][j]); +// // } +// // } + +// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// // } + +// // //----------------------------------------------------------------------------- + +// CASE( "Serialisation: Vector" ) { -// std::vector vout; -// for (size_t i = 0; i < N_VECTOR; i++) { -// vout.push_back("this is a test string look at it go woah and look its getting even bigger now this is probably big enough " + std::to_string(i)); +// eckit::PathName filename = "test_serialiser.out"; +// bool naive = false; +// for (size_t i = 0; i < 2; i++) { +// std::vector vout; +// for (size_t i = 0; i < N_EXTRACTIONREQUESTS; i++) { +// std::string s = "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=" + std::to_string(i); +// metkit::mars::MarsRequest marsrequest = metkit::mars::MarsRequest::parse(s); +// Ranges ranges = {Range(i, i+10), Range(i+11, i+12), Range(i+100, i+200)}; +// std::string hash = "testHash"; + +// ExtractionRequest req(marsrequest, ranges, hash); +// vout.push_back(req); // } - + // eckit::Timer timer_serialise; // { // eckit::FileStream sout(filename, "w"); // auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout); +// Serialiser::encode(sout, vout, naive); // } // timer_serialise.stop(); - + // eckit::Timer timer_deserialise; -// std::vector vin; +// std::vector vin; // { // eckit::FileStream sin(filename, "r"); // auto c = eckit::closer(sin); -// vin = Serialiser::decodeVectorString(sin); +// vin = Serialiser::decodeExtractionRequests(sin, naive); // } // timer_deserialise.stop(); - + // EXPECT_EQUAL(vout.size(), vin.size()); // for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i], vin[i]); +// EXPECT_EQUAL(vout[i].request().asString(), vin[i].request().asString()); +// EXPECT_EQUAL(vout[i].gridHash(), vin[i].gridHash()); +// EXPECT_EQUAL(vout[i].ranges(), vin[i].ranges()); // } - -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// } - - - -//----------------------------------------------------------------------------- - -CASE( "Serialisation: Ranges" ) { - - eckit::PathName filename = "test_serialiser.out"; - - bool naive = false; - for (size_t i = 0; i < 2; i++) { - Ranges vout; - for (size_t i = 0; i < N_VECTOR; i++) { - vout.push_back(Range(i, i+10)); - } - - eckit::Timer timer_serialise; - { - eckit::FileStream sout(filename, "w"); - auto c = eckit::closer(sout); - Serialiser::encode(sout, vout, naive); - } - timer_serialise.stop(); - - eckit::Timer timer_deserialise; - Ranges vin; - { - eckit::FileStream sin(filename, "r"); - auto c = eckit::closer(sin); - vin = Serialiser::decodeRanges(sin, naive); - } - timer_deserialise.stop(); - - EXPECT_EQUAL(vout.size(), vin.size()); - for (size_t i = 0; i < vout.size(); i++) { - EXPECT_EQUAL(vout[i].first, vin[i].first); - EXPECT_EQUAL(vout[i].second, vin[i].second); - } - - eckit::Log::info() << "Naive: " << naive << std::endl; - reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); - naive = !naive; - } -} - -//----------------------------------------------------------------------------- - -// CASE( "Serialisation: Vector>" ) { - -// eckit::PathName filename = "test_serialiser.out"; - -// std::vector> vout; -// for (size_t i = 0; i < N_VECTOR_VECTOR; i++) { -// std::vector inner = {1.0, 2.0, 3.0}; -// vout.push_back(inner); -// } -// eckit::Timer timer_serialise; -// { -// eckit::FileStream sout(filename, "w"); -// auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout); -// } -// timer_serialise.stop(); - -// eckit::Timer timer_deserialise; -// std::vector> vin; -// { -// eckit::FileStream sin(filename, "r"); -// auto c = eckit::closer(sin); -// vin = Serialiser::decodeVectorVector(sin); -// } -// timer_deserialise.stop(); - -// EXPECT_EQUAL(vout.size(), vin.size()); -// for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i].size(), vin[i].size()); -// for (size_t j = 0; j < vout[i].size(); j++) { -// EXPECT_EQUAL(vout[i][j], vin[i][j]); -// } +// eckit::Log::info() << "Naive: " << naive << std::endl; +// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// naive = !naive; // } - -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); // } -// //----------------------------------------------------------------------------- - -CASE( "Serialisation: Vector" ) { - - eckit::PathName filename = "test_serialiser.out"; - bool naive = false; - for (size_t i = 0; i < 2; i++) { - std::vector vout; - for (size_t i = 0; i < N_EXTRACTIONREQUESTS; i++) { - std::string s = "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=" + std::to_string(i); - metkit::mars::MarsRequest marsrequest = metkit::mars::MarsRequest::parse(s); - Ranges ranges = {Range(i, i+10), Range(i+11, i+12), Range(i+100, i+200)}; - std::string hash = "testHash"; - - ExtractionRequest req(marsrequest, ranges, hash); - vout.push_back(req); - } - - eckit::Timer timer_serialise; - { - eckit::FileStream sout(filename, "w"); - auto c = eckit::closer(sout); - Serialiser::encode(sout, vout, naive); - } - timer_serialise.stop(); - - eckit::Timer timer_deserialise; - std::vector vin; - { - eckit::FileStream sin(filename, "r"); - auto c = eckit::closer(sin); - vin = Serialiser::decodeExtractionRequests(sin, naive); - } - timer_deserialise.stop(); - - EXPECT_EQUAL(vout.size(), vin.size()); - for (size_t i = 0; i < vout.size(); i++) { - EXPECT_EQUAL(vout[i].request().asString(), vin[i].request().asString()); - EXPECT_EQUAL(vout[i].gridHash(), vin[i].gridHash()); - EXPECT_EQUAL(vout[i].ranges(), vin[i].ranges()); - } - - eckit::Log::info() << "Naive: " << naive << std::endl; - reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); - naive = !naive; - } -} - -// //----------------------------------------------------------------------------- - -// CASE( "Serialisation: Vector>" ) { - -// eckit::PathName filename = "test_serialiser.out"; - -// std::vector> vout; -// for (size_t i = 0; i < N_EXTRACTIONRESULTS; i++) { -// std::vector inner; -// for (size_t j = 0; j < 1; j++) { -// std::vector> values = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; -// std::vector>> mask = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; -// inner.push_back(ExtractionResult(values, mask)); -// } -// vout.push_back(std::move(inner)); -// } - -// eckit::Timer timer_serialise; -// { -// eckit::FileStream sout(filename, "w"); -// auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout); -// } -// timer_serialise.stop(); - -// eckit::Timer timer_deserialise; -// std::vector> vin; -// { -// eckit::FileStream sin(filename, "r"); -// auto c = eckit::closer(sin); -// vin = Serialiser::decodeExtractionResults(sin); -// } -// timer_deserialise.stop(); - -// EXPECT_EQUAL(vout.size(), vin.size()); -// for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i].size(), vin[i].size()); -// for (size_t j = 0; j < vout[i].size(); j++) { -// auto vout_values = vout[i][j].values(); -// auto vin_values = vin[i][j].values(); -// EXPECT_EQUAL(vout_values.size(), vin_values.size()); -// for (size_t k = 0; k < vout_values.size(); k++) { -// EXPECT_EQUAL(vout_values[k], vin_values[k]); -// } - -// auto vout_mask = vout[i][j].mask(); -// auto vin_mask = vin[i][j].mask(); -// EXPECT_EQUAL(vout_mask.size(), vin_mask.size()); -// for (size_t k = 0; k < vout_mask.size(); k++) { -// EXPECT_EQUAL(vout_mask[k].size(), vin_mask[k].size()); -// for (size_t l = 0; l < vout_mask[k].size(); l++) { -// EXPECT_EQUAL(vout_mask[k][l], vin_mask[k][l]); -// } -// } -// } -// } - -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// } +// // //----------------------------------------------------------------------------- + +// // CASE( "Serialisation: Vector>" ) { + +// // eckit::PathName filename = "test_serialiser.out"; + +// // std::vector> vout; +// // for (size_t i = 0; i < N_EXTRACTIONRESULTS; i++) { +// // std::vector inner; +// // for (size_t j = 0; j < 1; j++) { +// // std::vector> values = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; +// // std::vector>> mask = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; +// // inner.push_back(ExtractionResult(values, mask)); +// // } +// // vout.push_back(std::move(inner)); +// // } + +// // eckit::Timer timer_serialise; +// // { +// // eckit::FileStream sout(filename, "w"); +// // auto c = eckit::closer(sout); +// // Serialiser::encode(sout, vout); +// // } +// // timer_serialise.stop(); + +// // eckit::Timer timer_deserialise; +// // std::vector> vin; +// // { +// // eckit::FileStream sin(filename, "r"); +// // auto c = eckit::closer(sin); +// // vin = Serialiser::decodeExtractionResults(sin); +// // } +// // timer_deserialise.stop(); + +// // EXPECT_EQUAL(vout.size(), vin.size()); +// // for (size_t i = 0; i < vout.size(); i++) { +// // EXPECT_EQUAL(vout[i].size(), vin[i].size()); +// // for (size_t j = 0; j < vout[i].size(); j++) { +// // auto vout_values = vout[i][j].values(); +// // auto vin_values = vin[i][j].values(); +// // EXPECT_EQUAL(vout_values.size(), vin_values.size()); +// // for (size_t k = 0; k < vout_values.size(); k++) { +// // EXPECT_EQUAL(vout_values[k], vin_values[k]); +// // } + +// // auto vout_mask = vout[i][j].mask(); +// // auto vin_mask = vin[i][j].mask(); +// // EXPECT_EQUAL(vout_mask.size(), vin_mask.size()); +// // for (size_t k = 0; k < vout_mask.size(); k++) { +// // EXPECT_EQUAL(vout_mask[k].size(), vin_mask[k].size()); +// // for (size_t l = 0; l < vout_mask[k].size(); l++) { +// // EXPECT_EQUAL(vout_mask[k][l], vin_mask[k][l]); +// // } +// // } +// // } +// // } + +// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); +// // } } // namespace gribjump From f120e8f19fff7939f861ceb5dc76b195c3703899 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Wed, 13 Nov 2024 16:16:33 +0000 Subject: [PATCH 5/8] Cleanup experimental code --- pygribjump/src/pygribjump/pygribjump.py | 1 - src/gribjump/CMakeLists.txt | 3 - src/gribjump/Engine.cc | 25 +- src/gribjump/Engine.h | 1 - src/gribjump/ExtractionData.cc | 126 +-------- src/gribjump/ExtractionData.h | 11 +- src/gribjump/GribJumpBase.h | 1 - src/gribjump/Lister.cc | 64 +---- src/gribjump/Serialiser.cc | 307 -------------------- src/gribjump/Serialiser.h | 53 ---- src/gribjump/Task.cc | 2 +- src/gribjump/gribjump_c.cc | 13 +- src/gribjump/remote/GribJumpUser.cc | 2 +- src/gribjump/tools/ToolUtils.cc | 6 +- tests/CMakeLists.txt | 9 - tests/test_serialiser.cc | 359 ------------------------ 16 files changed, 37 insertions(+), 946 deletions(-) delete mode 100644 src/gribjump/Serialiser.cc delete mode 100644 src/gribjump/Serialiser.h delete mode 100644 tests/test_serialiser.cc diff --git a/pygribjump/src/pygribjump/pygribjump.py b/pygribjump/src/pygribjump/pygribjump.py index ddae332..27ee0ec 100644 --- a/pygribjump/src/pygribjump/pygribjump.py +++ b/pygribjump/src/pygribjump/pygribjump.py @@ -258,7 +258,6 @@ class ExtractionRequest: The ranges to extract. """ def __init__(self, req, ranges, gridHash=None): - # reqstr = "retrieve,"+dic_to_request(req) reqstr = dic_to_request(req) rangestr = list_to_rangestr(ranges) request = ffi.new('gribjump_extraction_request_t**') diff --git a/src/gribjump/CMakeLists.txt b/src/gribjump/CMakeLists.txt index 1e8ee15..dce4cc5 100644 --- a/src/gribjump/CMakeLists.txt +++ b/src/gribjump/CMakeLists.txt @@ -49,9 +49,6 @@ list( APPEND gribjump_srcs Metrics.h Metrics.cc Types.h - - Serialiser.h - Serialiser.cc ) if( HAVE_GRIBJUMP_LOCAL_EXTRACT ) diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index 87fe980..f69ede9 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -44,18 +44,19 @@ Engine::~Engine() {} metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ){ // Split strings into one unified map - // We also canonicalise the requests + // We also canonicalise the requests such that their keys are in alphabetical order + std::map> keyValues; for (auto& r : requests) { const std::string& s = r.requestString(); - std::vector kvs = eckit::StringTools::split(",", s); // might be faster to use tokenizer directly. + std::vector kvs = eckit::StringTools::split(",", s); /// @todo might be faster to use tokenizer directly. for (auto& kv : kvs) { std::vector kv_s = eckit::StringTools::split("=", kv); if (kv_s.size() != 2) continue; // ignore verb keyValues[kv_s[0]].insert(kv_s[1]); } - // Important! Canonicalise string by sorting keys + // Canonicalise string by sorting keys std::sort(kvs.begin(), kvs.end()); std::string canonicalised = ""; for (auto& kv : kvs) { @@ -64,14 +65,14 @@ metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, canonicalised += ","; } } - ASSERT(keyToExtractionItem.find(canonicalised) == keyToExtractionItem.end()); /// no repeats + ASSERT(keyToExtractionItem.find(canonicalised) == keyToExtractionItem.end()); // no repeats r.requestString(canonicalised); auto extractionItem = std::make_unique(canonicalised, r.ranges()); extractionItem->gridHash(r.gridHash()); keyToExtractionItem.emplace(canonicalised, std::move(extractionItem)); // 1-to-1-map } - // --- construct the union request + // Construct the union request std::string result = "retrieve,"; size_t i = 0; @@ -105,13 +106,7 @@ metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, filemap_t Engine::buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem) { // Map files to ExtractionItem - eckit::Timer timer("Gribjump Engine: Building file map"); - - MetricsManager::instance().set("debug_elapsed_union_request", timer.elapsed()); - timer.reset("Gribjump Engine: Flattened requests and constructed union request"); - filemap_t filemap = FDBLister::instance().fileMap(unionrequest, keyToExtractionItem); - return filemap; } @@ -122,10 +117,11 @@ void Engine::forwardRemoteExtraction(filemap_t& filemap) { const std::map& servermap_str = LibGribJump::instance().config().serverMap(); ASSERT(!servermap_str.empty()); - for (auto& [fdb, gj] : servermap_str) { - LOG_DEBUG_LIB(LibGribJump) << "Servermap: " << fdb << " -> " << gj << std::endl; + if (LibGribJump::instance().debug()) { + for (auto& [fdb, gj] : servermap_str) { + LOG_DEBUG_LIB(LibGribJump) << "Servermap: " << fdb << " -> " << gj << std::endl; + } } - std::unordered_map servermap; for (auto& [fdb, gj] : servermap_str) { eckit::net::Endpoint fdbEndpoint(fdb); @@ -223,7 +219,6 @@ ResultsMap Engine::collectResults(ExItemMap& keyToExtractionItem) { // Create map of base request to vector of extraction items. Takes ownership of the ExtractionItems ResultsMap results; - // NOTIMP; for (auto& [key, ex] : keyToExtractionItem) { results[ex->request()].push_back(std::move(ex)); } diff --git a/src/gribjump/Engine.h b/src/gribjump/Engine.h index d0182dc..5302e96 100644 --- a/src/gribjump/Engine.h +++ b/src/gribjump/Engine.h @@ -48,7 +48,6 @@ class Engine { void forwardRemoteExtraction(filemap_t& filemap); metkit::mars::MarsRequest buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ); - private: TaskGroup taskGroup_; /// @todo Maybe we should be returning the taskGroup, rather than storing it here. diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index bdf305f..f7a44ca 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -37,69 +37,6 @@ std::vector decodeVector(eckit::Stream& s) { return std::vector(data, data + size); } -void encodeVector(eckit::Stream& s, const std::vector& v) { - size_t size = v.size(); - s << size; - eckit::Buffer buffer(v.data(), size * sizeof(unsigned long long)); - s << buffer; -} - -std::vector decodeVectorUll(eckit::Stream& s) { - size_t size; - s >> size; - eckit::Buffer buffer(size * sizeof(unsigned long long)); - s >> buffer; - unsigned long long* data = (unsigned long long*) buffer.data(); - return std::vector(data, data + size); -} - - -void encodeVectorVector(eckit::Stream& s, const std::vector>& v) { - size_t size = v.size(); - s << size; - size_t totalSize = 0; - for (auto& inner : v) { - totalSize += inner.size(); - s << inner.size(); - } - s << totalSize; - eckit::Buffer buffer(totalSize * sizeof(double)); - double* data = (double*) buffer.data(); - for (auto& inner : v) { - for (auto& d : inner) { - *data++ = d; - } - } - s << buffer; -} - -std::vector> decodeVectorVector(eckit::Stream& s) { - size_t size; - s >> size; - std::vector innerSizes; - size_t totalSize = 0; - for (size_t i = 0; i < size; i++) { - size_t innerSize; - s >> innerSize; - innerSizes.push_back(innerSize); - totalSize += innerSize; - } - - eckit::Buffer buffer(totalSize * sizeof(double)); - s >> buffer; - double* data = (double*) buffer.data(); - - std::vector> result; - size_t offset = 0; - for (auto& innerSize : innerSizes) { - std::vector inner(data + offset, data + offset + innerSize); - result.push_back(inner); - offset += innerSize; - } - - return result; -} - } // namespace ExtractionResult::ExtractionResult() {} @@ -116,16 +53,6 @@ ExtractionResult::ExtractionResult(eckit::Stream& s) { values_.push_back(decodeVector(s)); } - // s >> numRanges; // maybe wrong - // for (size_t i = 0; i < numRanges; i++) { - // std::vector bitsetUll = decodeVectorUll(s); - // for (auto& b : bitsetUll) { - // mask_[i].push_back(std::bitset<64>(b)); - // } - - // } - - std::vector> bitsetStrings; s >> bitsetStrings; for (auto& v : bitsetStrings) { @@ -154,16 +81,6 @@ void ExtractionResult::encode(eckit::Stream& s) const { encodeVector(s, v); } - // s << mask_.size(); // vector of vectors - // for (auto& v : mask_) { - // std::vector bitsetUll; - // for (auto& b : v) { - // bitsetUll.push_back(b.to_ullong()); - // } - // encodeVector(s, bitsetUll); - // } - - std::vector> bitsetStrings; for (auto& v : mask_) { std::vector bitsetString; @@ -205,23 +122,14 @@ eckit::Stream& operator<<(eckit::Stream& s, const ExtractionResult& o) { ExtractionRequest::ExtractionRequest(const std::string& request, const std::vector& ranges, std::string gridHash): ranges_(ranges), - request_string_(request), + request_(request), gridHash_(gridHash) {} ExtractionRequest::ExtractionRequest() {} ExtractionRequest::ExtractionRequest(eckit::Stream& s) { - // request_ = metkit::mars::MarsRequest(s); // original - - // Now convert it to a mars request - s >> request_string_; - // request_ = metkit::mars::MarsRequest::parse(request_string_); // very very slow - // takes an istream - // std::istringstream istream(request_string_); - // metkit::mars::MarsParser parser(istream); - // request_ = parser.parse()[0]; // hard asserting that this expands to one request - + s >> request_; s >> gridHash_; size_t numRanges; s >> numRanges; @@ -232,41 +140,13 @@ ExtractionRequest::ExtractionRequest(eckit::Stream& s) { } } -std::vector ExtractionRequest::split(const std::string& key) const { - NOTIMP; - // todo: implement for string request - - // std::vector reqs = request_.split(key); - - // std::vector requests; - // requests.reserve(reqs.size()); - // for (auto& r : reqs) { - // requests.push_back(ExtractionRequest(r, ranges_)); - // } - // return requests; -} - -std::vector ExtractionRequest::split(const std::vector& keys) const { - NOTIMP; - // todo: implement for string request - // std::vector reqs = request_.split(keys); - - // std::vector requests; - // requests.reserve(reqs.size()); - // for (auto& r : reqs) { - // requests.push_back(ExtractionRequest(r, ranges_)); - // } - // return requests; -} - eckit::Stream& operator<<(eckit::Stream& s, const ExtractionRequest& o) { o.encode(s); return s; } void ExtractionRequest::encode(eckit::Stream& s) const { - // s << request_; - s << request_string_; + s << request_; s << gridHash_; s << ranges_.size(); for (auto& [start, end] : ranges_) { diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index 2ffa890..add8112 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -76,16 +76,12 @@ class ExtractionRequest { public: // methods ExtractionRequest(); - // ExtractionRequest(const metkit::mars::MarsRequest&, const std::vector&, std::string gridHash=""); ExtractionRequest(const std::string&, const std::vector&, std::string gridHash=""); explicit ExtractionRequest(eckit::Stream& s); - std::vector split(const std::vector& keys) const; - std::vector split(const std::string& key) const; const std::vector& ranges() const {return ranges_;} - // const metkit::mars::MarsRequest& request() const {NOTIMP; return request_;} - const std::string& requestString() const {return request_string_;} - void requestString(const std::string& s) {request_string_ = s;} + const std::string& requestString() const {return request_;} + void requestString(const std::string& s) {request_ = s;} const std::string& gridHash() const {return gridHash_;} private: // methods @@ -96,8 +92,7 @@ class ExtractionRequest { private: // members std::vector ranges_; - metkit::mars::MarsRequest request_; - std::string request_string_; // debug: is this better? + std::string request_; std::string gridHash_; friend class Serialiser; diff --git a/src/gribjump/GribJumpBase.h b/src/gribjump/GribJumpBase.h index 046f3b0..6961376 100644 --- a/src/gribjump/GribJumpBase.h +++ b/src/gribjump/GribJumpBase.h @@ -33,7 +33,6 @@ namespace fdb5 { namespace gribjump { -// using ResultsMap = std::map>>; using ResultsMap = std::map>>; class GribJumpBase : public eckit::NonCopyable { diff --git a/src/gribjump/Lister.cc b/src/gribjump/Lister.cc index 5d5be10..8b644eb 100644 --- a/src/gribjump/Lister.cc +++ b/src/gribjump/Lister.cc @@ -77,50 +77,27 @@ std::string fdbkeyToStr(const fdb5::Key& key) { filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, const ExItemMap& reqToExtractionItem) { eckit::AutoLock lock(this); filemap_t filemap; - eckit::Timer timer; - std::cout << "DEBUG: unionRequest: " << unionRequest << std::endl; - fdb5::FDBToolRequest fdbreq(unionRequest); + fdb5::FDBToolRequest fdbreq(unionRequest); auto listIter = fdb_.list(fdbreq, true); - MetricsManager::instance().set("debug_elapsed_fdb_list", timer.elapsed()); - timer.reset("FDB list"); - + size_t fdb_count = 0; size_t count = 0; - fdb5::ListElement elem; - - // chrono, we're going to accumulate some times - - double time_tostr = 0; - double time_uri = 0; - double time_filemap = 0; - - double time_next=0; - - eckit::Timer timer_next; - size_t fdb_count=0; while (listIter.next(elem)) { fdb_count++; - time_next += timer_next.elapsed(); - eckit::Timer timer1; std::string key = fdbkeyToStr(elem.combinedKey()); - time_tostr += timer1.elapsed(); // If key not in map, not related to the request - eckit::Timer timer2; if (reqToExtractionItem.find(key) == reqToExtractionItem.end()) continue; - // Set the URI in the ExtractionItem eckit::URI uri = elem.location().fullUri(); ExtractionItem* extractionItem = reqToExtractionItem.at(key).get(); extractionItem->URI(uri); - time_uri += timer2.elapsed(); // Add to filemap - eckit::Timer timer3; eckit::PathName fname = uri.path(); auto it = filemap.find(fname); if(it == filemap.end()) { @@ -131,47 +108,32 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons else { it->second.push_back(extractionItem); } - time_filemap += timer3.elapsed(); count++; - - // timer_next.reset(""); - } - MetricsManager::instance().set("debug_list_time_tostr", time_tostr); - MetricsManager::instance().set("debug_list_time_uri", time_uri); - MetricsManager::instance().set("debug_list_time_filemap", time_filemap); - - - eckit::Timer timer_extra; - - LOG_DEBUG_LIB(LibGribJump) << "Found " << count << " fields in " << filemap.size() << " files" << std::endl; - LOG_DEBUG_LIB(LibGribJump) << "FDB count: " << fdb_count << std::endl; + LOG_DEBUG_LIB(LibGribJump) << "FDB found " << fdb_count << " fields. Matched " << count << " fields in " << filemap.size() << " files" << std::endl; if (count != reqToExtractionItem.size()) { - eckit::Log::warning() << "Warning: Number of fields found (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; + eckit::Log::warning() << "Warning: Number of fields matched (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; if (!allowMissing_) { std::stringstream ss; - ss << "Found " << count << " fields but " << reqToExtractionItem.size() << " were requested." << std::endl; + ss << "Matched " << count << " fields but " << reqToExtractionItem.size() << " were requested." << std::endl; ss << "Union request: " << unionRequest << std::endl; throw DataNotFoundException(ss.str()); } } - // print the file map - LOG_DEBUG_LIB(LibGribJump) << "File map: " << std::endl; - for (const auto& file : filemap) { - LOG_DEBUG_LIB(LibGribJump) << " file=" << file.first << ", Offsets=["; - for (const auto& extractionItem : file.second) { - LOG_DEBUG_LIB(LibGribJump) << extractionItem->offset() << ", "; + if (LibGribJump::instance().debug()) { + LOG_DEBUG_LIB(LibGribJump) << "File map: " << std::endl; + for (const auto& file : filemap) { + LOG_DEBUG_LIB(LibGribJump) << " file=" << file.first << ", Offsets=["; + for (const auto& extractionItem : file.second) { + LOG_DEBUG_LIB(LibGribJump) << extractionItem->offset() << ", "; + } + LOG_DEBUG_LIB(LibGribJump) << "]" << std::endl; } - LOG_DEBUG_LIB(LibGribJump) << "]" << std::endl; } - MetricsManager::instance().set("debug_list_time_extra", timer_extra.elapsed()); - - MetricsManager::instance().set("debug_listiter_to_filemap", timer.elapsed()); - return filemap; } diff --git a/src/gribjump/Serialiser.cc b/src/gribjump/Serialiser.cc deleted file mode 100644 index a86fb70..0000000 --- a/src/gribjump/Serialiser.cc +++ /dev/null @@ -1,307 +0,0 @@ -// /* -// * (C) Copyright 2024- ECMWF. -// * -// * This software is licensed under the terms of the Apache Licence Version 2.0 -// * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -// * In applying this licence, ECMWF does not waive the privileges and immunities -// * granted to it by virtue of its status as an intergovernmental organisation nor -// * does it submit to any jurisdiction. -// */ - -// /// @author Christopher Bradley - -// #include "eckit/value/Value.h" -// #include "eckit/io/Buffer.h" -// #include "gribjump/Serialiser.h" - - -// namespace gribjump { - -// void Serialiser::encode(eckit::Stream& s, const std::vector& v) { -// size_t size = v.size(); -// s << size; -// eckit::Buffer buffer(v.data(), size * sizeof(double)); -// s << buffer; -// } - -// std::vector Serialiser::decodeVector(eckit::Stream& s) { -// size_t size; -// s >> size; -// eckit::Buffer buffer(size * sizeof(double)); -// s >> buffer; -// double* data = (double*) buffer.data(); - -// return std::vector(data, data + size); -// } - -// // ----------------------------------------------------------------------------------------------- -// // vector of pairs - -// // conclusion: naive is worse for vector of pairs. -// void Serialiser::encode(eckit::Stream& s, const Ranges& v, bool naive) { -// if (naive) { -// size_t size = v.size(); -// s << size; -// for (auto& pair : v) { -// s << pair.first; -// s << pair.second; -// } -// return; -// } -// // else -// size_t size = v.size(); -// s << size; - -// // We know they are pairs, don't need size -// eckit::Buffer buffer(v.data(), v.size() * sizeof(size_t) * 2); -// s << buffer; -// } - -// Ranges Serialiser::decodeRanges(eckit::Stream& s, bool naive) { -// if (naive) { -// Ranges result; -// size_t size; -// s >> size; -// for (size_t i = 0; i < size; i++) { -// size_t first; -// size_t second; -// s >> first; -// s >> second; -// result.push_back(std::make_pair(first, second)); -// } -// return result; -// } -// // else -// size_t size; -// s >> size; -// eckit::Buffer buffer(size * sizeof(size_t) * 2); -// s >> buffer; -// size_t* data = (size_t*) buffer.data(); - -// Ranges result; -// for (size_t i = 0; i < size; i++) { -// result.push_back(std::make_pair(data[i*2], data[i*2 + 1])); -// } -// return result; -// } - - -// // ----------------------------------------------------------------------------------------------- - -// void Serialiser::encode(eckit::Stream& s, const std::vector& v) { -// // Don't want to just do s << str, since this is quite slow. -// // Use a buffer for all strings. -// size_t size = v.size(); -// s << size; -// size_t totalSize = 0; -// for (auto& str : v) { -// totalSize += str.size(); -// s << str.size(); -// } -// eckit::Buffer buffer(totalSize); -// char* data = (char*) buffer.data(); -// for (auto& str : v) { -// for (auto& c : str) { -// *data++ = c; -// } -// } -// s << buffer; -// } - -// std::vector Serialiser::decodeVectorString(eckit::Stream& s) { -// size_t size; -// s >> size; -// std::vector innerSizes; -// size_t totalSize = 0; -// for (size_t i = 0; i < size; i++) { -// size_t innerSize; -// s >> innerSize; -// innerSizes.push_back(innerSize); -// totalSize += innerSize; -// } - -// eckit::Buffer buffer(totalSize); -// s >> buffer; -// char* data = (char*) buffer.data(); - -// std::vector result; -// size_t offset = 0; -// for (auto& innerSize : innerSizes) { -// std::string inner(data + offset, innerSize); -// result.push_back(inner); -// offset += innerSize; -// } - -// return result; -// } - -// // Naive version is actually faster -// void Serialiser::encodeNaive(eckit::Stream& s, const std::vector& v) { -// s << v; -// } - -// std::vector Serialiser::decodeVectorStringNaive(eckit::Stream& s) { -// std::vector result; -// s >> result; -// return result; -// } - -// // ----------------------------------------------------------------------------------------------- - -// void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { -// size_t size = v.size(); -// s << size; -// size_t totalSize = 0; -// for (auto& inner : v) { -// totalSize += inner.size(); -// s << inner.size(); -// } -// eckit::Buffer buffer(totalSize * sizeof(double)); -// double* data = (double*) buffer.data(); -// for (auto& inner : v) { -// for (auto& d : inner) { -// *data++ = d; -// } -// } -// s << buffer; -// } - -// std::vector> Serialiser::decodeVectorVector(eckit::Stream& s) { -// std::vector> result; - -// size_t size; -// s >> size; -// std::vector innerSizes; -// size_t totalSize = 0; -// for (size_t i = 0; i < size; i++) { -// size_t innerSize; -// s >> innerSize; -// innerSizes.push_back(innerSize); -// totalSize += innerSize; -// } - -// eckit::Buffer buffer(totalSize * sizeof(double)); -// s >> buffer; -// double* data = (double*) buffer.data(); - -// size_t offset = 0; -// for (auto& innerSize : innerSizes) { -// std::vector inner(data + offset, data + offset + innerSize); -// result.push_back(inner); -// offset += innerSize; -// } - -// return result; -// } - -// // ----------------------------------------------------------------------------------------------- -// void Serialiser::encode(eckit::Stream& s, const std::vector& v, bool naive) { -// if (naive) { -// size_t size = v.size(); -// s << size; -// for (auto& req : v) { -// req.encode(s); -// } -// return; -// } - -// std::vector gridHashes; -// RangesList ranges; - -// // reserve -// gridHashes.reserve(v.size()); -// ranges.reserve(v.size()); - -// for (auto& req : v) { // This copy is grim -// gridHashes.push_back(req.gridHash()); -// ranges.push_back(req.ranges()); -// } - -// encodeNaive(s, gridHashes); -// s << ranges.size(); -// for (auto& r : ranges) { -// encode(s, r, false); -// } - -// // encode the mars requests naively -// s << v.size(); -// for (auto& req : v) { -// s << req.request(); -// } -// } - -// std::vector Serialiser::decodeExtractionRequests(eckit::Stream& s, bool naive) { -// if (naive) { -// std::vector result; -// size_t size; -// s >> size; -// for (size_t i = 0; i < size; i++) { -// result.push_back(ExtractionRequest(s)); -// } -// return result; -// } - -// std::vector gridHashes = decodeVectorStringNaive(s); - -// size_t numRanges; -// s >> numRanges; -// RangesList ranges; -// ranges.reserve(numRanges); -// for (size_t i = 0; i < numRanges; i++) { -// ranges.push_back(decodeRanges(s, false)); -// } - -// std::vector marsrequests; -// size_t numMarsRequests; -// s >> numMarsRequests; -// marsrequests.reserve(numMarsRequests); -// for (size_t i = 0; i < numMarsRequests; i++) { -// metkit::mars::MarsRequest marsrequest(s); -// marsrequests.push_back(marsrequest); -// } - -// // repack -// std::vector result; -// for (size_t i = 0; i < marsrequests.size(); i++) { -// result.push_back(ExtractionRequest(marsrequests[i], ranges[i], gridHashes[i])); -// } - -// return result; -// } - - -// // ----------------------------------------------------------------------------------------------- -// void Serialiser::encode(eckit::Stream& s, const std::vector>& v) { -// size_t size = v.size(); -// s << size; -// for (auto& inner : v) { -// size_t innerSize = inner.size(); -// s << innerSize; -// for (auto& res : inner) { -// res.encode(s); -// } -// } - -// } - -// std::vector> Serialiser::decodeExtractionResults(eckit::Stream& s) { -// std::vector> result; - -// size_t size; -// s >> size; -// for (size_t i = 0; i < size; i++) { -// size_t innerSize; -// s >> innerSize; -// std::vector inner; -// for (size_t j = 0; j < innerSize; j++) { -// inner.push_back(ExtractionResult(s)); -// } -// result.push_back(std::move(inner)); -// } - -// return result; -// } - - -// } // namespace gribjump - diff --git a/src/gribjump/Serialiser.h b/src/gribjump/Serialiser.h deleted file mode 100644 index ae7f2cc..0000000 --- a/src/gribjump/Serialiser.h +++ /dev/null @@ -1,53 +0,0 @@ -// /* -// * (C) Copyright 2024- ECMWF. -// * -// * This software is licensed under the terms of the Apache Licence Version 2.0 -// * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -// * In applying this licence, ECMWF does not waive the privileges and immunities -// * granted to it by virtue of its status as an intergovernmental organisation nor -// * does it submit to any jurisdiction. -// */ - -// /// @author Christopher Bradley - -// #pragma once - -// #include -// #include "eckit/serialisation/Stream.h" -// #include "gribjump/ExtractionData.h" -// #include "gribjump/Types.h" - -// // Class to help with serialisation of containers - -// namespace gribjump { - -// class Serialiser { -// public: -// static void encode(eckit::Stream& s, const std::vector& v); -// static std::vector decodeVector(eckit::Stream& s); - -// static void encode(eckit::Stream& s, const std::vector& v); -// static std::vector decodeVectorString(eckit::Stream& s); - -// static void encode(eckit::Stream& s, const std::vector>& v); -// static std::vector> decodeVectorVector(eckit::Stream& s); - -// static void encode(eckit::Stream& s, const std::vector& v, bool naive=false); -// static std::vector decodeExtractionRequests(eckit::Stream& s, bool naive=false); - -// // We tend to have a vector of vectors of ExtractionResults -// static void encode(eckit::Stream& s, const std::vector>& v); -// static std::vector> decodeExtractionResults(eckit::Stream& s); - - -// static void encode(eckit::Stream& s, const Ranges& v, bool naive=false); -// static Ranges decodeRanges(eckit::Stream& s, bool naive=false); - -// //------------------------------------------------------------------------------------------------- -// // Naive implementations, for timing comparison -// static void encodeNaive(eckit::Stream& s, const std::vector& v); -// static std::vector decodeVectorStringNaive(eckit::Stream& s); -// }; - -// } // namespace gribjump - diff --git a/src/gribjump/Task.cc b/src/gribjump/Task.cc index cb22c48..2ca85d1 100644 --- a/src/gribjump/Task.cc +++ b/src/gribjump/Task.cc @@ -186,7 +186,7 @@ void FileExtractionTask::extract() { throw eckit::BadValue("Grid hash was not specified in request but is required. (Extraction item " + std::to_string(i) + " in file " + fname_ + ")"); } if (!ignoreGrid_ && (expectedHash != info.md5GridSection())) { - throw eckit::BadValue("Grid hash mismatch for extraction item " + std::to_string(i) + " in file " + fname_ + ". Expected: " + expectedHash + ", got: " + info.md5GridSection()); + throw eckit::BadValue("Grid hash mismatch for extraction item " + std::to_string(i) + " in file " + fname_ + ". Request specified: " + expectedHash + ", JumpInfo contains: " + info.md5GridSection()); } std::unique_ptr jumper(JumperFactory::instance().build(info)); // todo, dont build a new jumper for each info. diff --git a/src/gribjump/gribjump_c.cc b/src/gribjump/gribjump_c.cc index e95811e..9aa99bf 100644 --- a/src/gribjump/gribjump_c.cc +++ b/src/gribjump/gribjump_c.cc @@ -132,29 +132,20 @@ int gribjump_delete_handle(gribjump_handle_t* handle) { int gribjump_new_request(gribjump_extraction_request_t** request, const char* reqstr, const char* rangesstr, const char* gridhash) { return wrapApiFunction([=] { - - // reqstr is a string representation of a metkit::mars::MarsRequest + // reqstr is a request string, we *ASSUME* that it resembles a valid mars request for a SINGLE field. // rangesstr is a comma-separated list of ranges, e.g. "0-10,20-30" - // NB: Treat the requests as raw requests. - // std::istringstream iss(reqstr); - // metkit::mars::MarsParser parser(iss); - // std::vector requests = parser.parse(); - // ASSERT(requests.size() == 1); - // metkit::mars::MarsRequest mreq(requests[0]); - // Parse the ranges string std::vector ranges = eckit::StringTools::split(",", rangesstr); std::vector rangevec; for (const auto& range : ranges) { - std::vector kv = eckit::StringTools::split("-", range); // this is awful + std::vector kv = eckit::StringTools::split("-", range); // this is silly, we should just pass the values as integers ASSERT(kv.size() == 2); rangevec.push_back(std::make_pair(std::stoi(kv[0]), std::stoi(kv[1]))); } std::string gridhash_str = gridhash ? std::string(gridhash) : ""; *request = new gribjump_extraction_request_t(reqstr, rangevec, gridhash_str); - }); } diff --git a/src/gribjump/remote/GribJumpUser.cc b/src/gribjump/remote/GribJumpUser.cc index 0d3ba02..308293e 100644 --- a/src/gribjump/remote/GribJumpUser.cc +++ b/src/gribjump/remote/GribJumpUser.cc @@ -61,7 +61,7 @@ void GribJumpUser::handle_client(eckit::Stream& s, eckit::Timer& timer) { s >> version; if (version != remoteProtocolVersion) { - throw eckit::SeriousBug("Gribjump remote-protocol mismatch: expected version " + std::to_string(protocolVersion_) + " but got " + std::to_string(version)); + throw eckit::SeriousBug("Gribjump remote-protocol mismatch: Serverside version: " + std::to_string(protocolVersion_) + ", Clientside version: " + std::to_string(version)); } LogContext ctx(s); diff --git a/src/gribjump/tools/ToolUtils.cc b/src/gribjump/tools/ToolUtils.cc index ab42258..d18d975 100644 --- a/src/gribjump/tools/ToolUtils.cc +++ b/src/gribjump/tools/ToolUtils.cc @@ -72,8 +72,10 @@ std::vector flattenRequest(const metkit::mars::MarsRe LOG_DEBUG_LIB(LibGribJump) << "Base request: " << request << std::endl; - for (const auto& req : flattenedRequests) { - LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; + if (LibGribJump::instance().debug()) { + for (const auto& req : flattenedRequests) { + LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; + } } return flattenedRequests; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 570c02b..7e1326c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -120,15 +120,6 @@ ecbuild_add_test( LIBS gribjump ) -ecbuild_add_test( - TARGET "gribjump_test_serialiser" - SOURCES "test_serialiser.cc" - INCLUDES "${ECKIT_INCLUDE_DIRS}" - ENVIRONMENT "${gribjump_env}" - NO_AS_NEEDED - LIBS gribjump -) - ecbuild_add_test( TARGET "gribjump_test_misc_units" SOURCES "test_misc_units.cc" diff --git a/tests/test_serialiser.cc b/tests/test_serialiser.cc deleted file mode 100644 index 92eaf43..0000000 --- a/tests/test_serialiser.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* - * (C) Copyright 1996- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include -#include - -#include "eckit/testing/Test.h" -#include "eckit/filesystem/PathName.h" -#include "eckit/filesystem/LocalPathName.h" -#include "eckit/serialisation/FileStream.h" -#include "eckit/io/AutoCloser.h" - -#include "metkit/mars/MarsRequest.h" - -#include "gribjump/Serialiser.h" -#include "gribjump/ExtractionData.h" - -#include "eckit/log/Timer.h" - - -using namespace eckit::testing; - -namespace gribjump::test { - -// Useful for timing -constexpr bool REPORT_TIMES = true; -constexpr size_t N_VECTOR = 100; -constexpr size_t N_VECTOR_VECTOR = 10; -constexpr size_t N_EXTRACTIONREQUESTS = 10; -constexpr size_t N_EXTRACTIONRESULTS = 10; - - -// void reportTimes(size_t N, double serialiseTime, double deserialiseTime) { -// if (!REPORT_TIMES) return; -// eckit::Log::info() << " For N=" << N << std::endl; -// eckit::Log::info() << " Serialisation time: " << serialiseTime << std::endl; -// eckit::Log::info() << " Deserialisation time: " << deserialiseTime << std::endl; -// eckit::Log::info() << " Total time: " << serialiseTime + deserialiseTime << std::endl; -// } - -// //----------------------------------------------------------------------------- - -CASE( "nothing" ) { -} - -// CASE( "Serialisation: Vector" ) { - -// eckit::PathName filename = "test_serialiser.out"; - -// std::vector vout(N_VECTOR); -// for (size_t i = 0; i < N_VECTOR; i++) { -// vout[i] = i; -// } - -// eckit::Timer timer_serialise; -// { -// eckit::FileStream sout(filename, "w"); -// auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout); -// } -// timer_serialise.stop(); - -// eckit::Timer timer_deserialise; -// std::vector vin; -// { -// eckit::FileStream sin(filename, "r"); -// auto c = eckit::closer(sin); -// vin = Serialiser::decodeVector(sin); -// } -// timer_deserialise.stop(); - -// EXPECT_EQUAL(vout.size(), vin.size()); -// for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i], vin[i]); -// } - -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// } - - -// // //----------------------------------------------------------------------------- - - -// // CASE( "Serialisation Naive: Vector" ) { - -// // eckit::PathName filename = "test_serialiser.out"; - -// // std::vector vout; -// // for (size_t i = 0; i < N_VECTOR; i++) { -// // vout.push_back("this is a test string look at it go woah " + std::to_string(i)); -// // } - -// // eckit::Timer timer_serialise; -// // { -// // eckit::FileStream sout(filename, "w"); -// // auto c = eckit::closer(sout); -// // Serialiser::encodeNaive(sout, vout); -// // } -// // timer_serialise.stop(); - -// // eckit::Timer timer_deserialise; -// // std::vector vin; -// // { -// // eckit::FileStream sin(filename, "r"); -// // auto c = eckit::closer(sin); -// // vin = Serialiser::decodeVectorStringNaive(sin); -// // } -// // timer_deserialise.stop(); - -// // EXPECT_EQUAL(vout.size(), vin.size()); -// // for (size_t i = 0; i < vout.size(); i++) { -// // EXPECT_EQUAL(vout[i], vin[i]); -// // } - -// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// // } - -// // //----------------------------------------------------------------------------- - -// // CASE( "Serialisation: Vector" ) { - -// // eckit::PathName filename = "test_serialiser.out"; - -// // std::vector vout; -// // for (size_t i = 0; i < N_VECTOR; i++) { -// // vout.push_back("this is a test string look at it go woah and look its getting even bigger now this is probably big enough " + std::to_string(i)); -// // } - -// // eckit::Timer timer_serialise; -// // { -// // eckit::FileStream sout(filename, "w"); -// // auto c = eckit::closer(sout); -// // Serialiser::encode(sout, vout); -// // } -// // timer_serialise.stop(); - -// // eckit::Timer timer_deserialise; -// // std::vector vin; -// // { -// // eckit::FileStream sin(filename, "r"); -// // auto c = eckit::closer(sin); -// // vin = Serialiser::decodeVectorString(sin); -// // } -// // timer_deserialise.stop(); - -// // EXPECT_EQUAL(vout.size(), vin.size()); -// // for (size_t i = 0; i < vout.size(); i++) { -// // EXPECT_EQUAL(vout[i], vin[i]); -// // } - -// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// // } - - - -// //----------------------------------------------------------------------------- - -// CASE( "Serialisation: Ranges" ) { - -// eckit::PathName filename = "test_serialiser.out"; - -// bool naive = false; -// for (size_t i = 0; i < 2; i++) { -// Ranges vout; -// for (size_t i = 0; i < N_VECTOR; i++) { -// vout.push_back(Range(i, i+10)); -// } - -// eckit::Timer timer_serialise; -// { -// eckit::FileStream sout(filename, "w"); -// auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout, naive); -// } -// timer_serialise.stop(); - -// eckit::Timer timer_deserialise; -// Ranges vin; -// { -// eckit::FileStream sin(filename, "r"); -// auto c = eckit::closer(sin); -// vin = Serialiser::decodeRanges(sin, naive); -// } -// timer_deserialise.stop(); - -// EXPECT_EQUAL(vout.size(), vin.size()); -// for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i].first, vin[i].first); -// EXPECT_EQUAL(vout[i].second, vin[i].second); -// } - -// eckit::Log::info() << "Naive: " << naive << std::endl; -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// naive = !naive; -// } -// } - -// //----------------------------------------------------------------------------- - -// // CASE( "Serialisation: Vector>" ) { - -// // eckit::PathName filename = "test_serialiser.out"; - -// // std::vector> vout; -// // for (size_t i = 0; i < N_VECTOR_VECTOR; i++) { -// // std::vector inner = {1.0, 2.0, 3.0}; -// // vout.push_back(inner); -// // } - -// // eckit::Timer timer_serialise; -// // { -// // eckit::FileStream sout(filename, "w"); -// // auto c = eckit::closer(sout); -// // Serialiser::encode(sout, vout); -// // } -// // timer_serialise.stop(); - -// // eckit::Timer timer_deserialise; -// // std::vector> vin; -// // { -// // eckit::FileStream sin(filename, "r"); -// // auto c = eckit::closer(sin); -// // vin = Serialiser::decodeVectorVector(sin); -// // } -// // timer_deserialise.stop(); - -// // EXPECT_EQUAL(vout.size(), vin.size()); -// // for (size_t i = 0; i < vout.size(); i++) { -// // EXPECT_EQUAL(vout[i].size(), vin[i].size()); -// // for (size_t j = 0; j < vout[i].size(); j++) { -// // EXPECT_EQUAL(vout[i][j], vin[i][j]); -// // } -// // } - -// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// // } - -// // //----------------------------------------------------------------------------- - -// CASE( "Serialisation: Vector" ) { - -// eckit::PathName filename = "test_serialiser.out"; -// bool naive = false; -// for (size_t i = 0; i < 2; i++) { -// std::vector vout; -// for (size_t i = 0; i < N_EXTRACTIONREQUESTS; i++) { -// std::string s = "retrieve,expver=xxxx,class=od,date=20241110,domain=g,levelist=1000,levtype=pl,param=129,stream=oper,time=1200,type=an,step=" + std::to_string(i); -// metkit::mars::MarsRequest marsrequest = metkit::mars::MarsRequest::parse(s); -// Ranges ranges = {Range(i, i+10), Range(i+11, i+12), Range(i+100, i+200)}; -// std::string hash = "testHash"; - -// ExtractionRequest req(marsrequest, ranges, hash); -// vout.push_back(req); -// } - -// eckit::Timer timer_serialise; -// { -// eckit::FileStream sout(filename, "w"); -// auto c = eckit::closer(sout); -// Serialiser::encode(sout, vout, naive); -// } -// timer_serialise.stop(); - -// eckit::Timer timer_deserialise; -// std::vector vin; -// { -// eckit::FileStream sin(filename, "r"); -// auto c = eckit::closer(sin); -// vin = Serialiser::decodeExtractionRequests(sin, naive); -// } -// timer_deserialise.stop(); - -// EXPECT_EQUAL(vout.size(), vin.size()); -// for (size_t i = 0; i < vout.size(); i++) { -// EXPECT_EQUAL(vout[i].request().asString(), vin[i].request().asString()); -// EXPECT_EQUAL(vout[i].gridHash(), vin[i].gridHash()); -// EXPECT_EQUAL(vout[i].ranges(), vin[i].ranges()); -// } - -// eckit::Log::info() << "Naive: " << naive << std::endl; -// reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// naive = !naive; -// } -// } - -// // //----------------------------------------------------------------------------- - -// // CASE( "Serialisation: Vector>" ) { - -// // eckit::PathName filename = "test_serialiser.out"; - -// // std::vector> vout; -// // for (size_t i = 0; i < N_EXTRACTIONRESULTS; i++) { -// // std::vector inner; -// // for (size_t j = 0; j < 1; j++) { -// // std::vector> values = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; -// // std::vector>> mask = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}}; -// // inner.push_back(ExtractionResult(values, mask)); -// // } -// // vout.push_back(std::move(inner)); -// // } - -// // eckit::Timer timer_serialise; -// // { -// // eckit::FileStream sout(filename, "w"); -// // auto c = eckit::closer(sout); -// // Serialiser::encode(sout, vout); -// // } -// // timer_serialise.stop(); - -// // eckit::Timer timer_deserialise; -// // std::vector> vin; -// // { -// // eckit::FileStream sin(filename, "r"); -// // auto c = eckit::closer(sin); -// // vin = Serialiser::decodeExtractionResults(sin); -// // } -// // timer_deserialise.stop(); - -// // EXPECT_EQUAL(vout.size(), vin.size()); -// // for (size_t i = 0; i < vout.size(); i++) { -// // EXPECT_EQUAL(vout[i].size(), vin[i].size()); -// // for (size_t j = 0; j < vout[i].size(); j++) { -// // auto vout_values = vout[i][j].values(); -// // auto vin_values = vin[i][j].values(); -// // EXPECT_EQUAL(vout_values.size(), vin_values.size()); -// // for (size_t k = 0; k < vout_values.size(); k++) { -// // EXPECT_EQUAL(vout_values[k], vin_values[k]); -// // } - -// // auto vout_mask = vout[i][j].mask(); -// // auto vin_mask = vin[i][j].mask(); -// // EXPECT_EQUAL(vout_mask.size(), vin_mask.size()); -// // for (size_t k = 0; k < vout_mask.size(); k++) { -// // EXPECT_EQUAL(vout_mask[k].size(), vin_mask[k].size()); -// // for (size_t l = 0; l < vout_mask[k].size(); l++) { -// // EXPECT_EQUAL(vout_mask[k][l], vin_mask[k][l]); -// // } -// // } -// // } -// // } - -// // reportTimes(vout.size(), timer_serialise.elapsed(), timer_deserialise.elapsed()); -// // } - -} // namespace gribjump - - -int main(int argc, char **argv) -{ - return run_tests ( argc, argv ); -} From a3fdd503600c805d12fe0b173570ba82a1a126d3 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Wed, 13 Nov 2024 16:40:15 +0000 Subject: [PATCH 6/8] Make lazy jumpinfo extraction a configurable option --- src/gribjump/GribJumpException.h | 11 +++++++++++ src/gribjump/info/InfoCache.cc | 15 ++++++++++++--- src/gribjump/info/InfoCache.h | 2 ++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/gribjump/GribJumpException.h b/src/gribjump/GribJumpException.h index c696a48..f24453c 100644 --- a/src/gribjump/GribJumpException.h +++ b/src/gribjump/GribJumpException.h @@ -40,4 +40,15 @@ class DataNotFoundException : public GribJumpException { GribJumpException("DataNotFound. " + msg, here) {} }; +class JumpInfoExtractionDisabled : public GribJumpException { +public: + + JumpInfoExtractionDisabled(const std::string& msg) : + GribJumpException("Lazy JumpInfo extraction has been disabled. " + msg) {} + + JumpInfoExtractionDisabled(const std::string& msg, const eckit::CodeLocation& here) : + GribJumpException("Lazy JumpInfo extraction has been disabled. " + msg, here) {} +}; + + } // namespace gribjump diff --git a/src/gribjump/info/InfoCache.cc b/src/gribjump/info/InfoCache.cc index f433899..8eb7a94 100644 --- a/src/gribjump/info/InfoCache.cc +++ b/src/gribjump/info/InfoCache.cc @@ -24,6 +24,7 @@ #include "gribjump/LibGribJump.h" #include "gribjump/info/InfoFactory.h" #include "gribjump/info/InfoExtractor.h" +#include "gribjump/GribJumpException.h" namespace gribjump { @@ -41,10 +42,11 @@ InfoCache::~InfoCache() { InfoCache::InfoCache(): cacheDir_(eckit::PathName()), - cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 64))) { + cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 64))), + lazy_(eckit::Resource("gribjumpLazyInfo", LibGribJump::instance().config().getBool("cache.lazy", true))) { const Config& config = LibGribJump::instance().config(); - + bool enabled = config.getBool("cache.enabled", true); if (!enabled) { persistentCache_ = false; @@ -120,6 +122,9 @@ std::shared_ptr InfoCache::get(const eckit::PathName& path, const ecki } // Extract explicitly + if (!lazy_) { + throw JumpInfoExtractionDisabled("No JumpInfo found for path " + path + " at offset " + std::to_string(offset)); + } InfoExtractor extractor; std::shared_ptr info = extractor.extract(path, offset); @@ -143,7 +148,11 @@ std::vector> InfoCache::get(const eckit::PathName& pat } if (!missingOffsets.empty()) { - + if (!lazy_) { + std::stringstream ss; + ss << "Missing JumpInfo for " << eckit::Plural(missingOffsets.size(), "offset") << " in " << path; + throw JumpInfoExtractionDisabled(ss.str()); + } std::sort(missingOffsets.begin(), missingOffsets.end()); InfoExtractor extractor; diff --git a/src/gribjump/info/InfoCache.h b/src/gribjump/info/InfoCache.h index 4c7eae3..b8de63e 100644 --- a/src/gribjump/info/InfoCache.h +++ b/src/gribjump/info/InfoCache.h @@ -80,6 +80,8 @@ class InfoCache { bool persistentCache_ = true; + bool lazy_; //< if true, cache.get may construct JumpInfo on the fly + bool shadowCache_ = false; //< if true, cache files are persisted next to the original data files (e.g. in FDB) // This takes precedence over cacheDir_. }; From 14e3372980d3078821b4fb17fbbd44d2543a1c9b Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Wed, 13 Nov 2024 17:33:35 +0000 Subject: [PATCH 7/8] Small tidy up --- pygribjump/src/pygribjump/pygribjump.py | 2 +- src/gribjump/Engine.cc | 2 ++ src/gribjump/ExtractionData.cc | 2 -- src/gribjump/ExtractionData.h | 4 ---- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pygribjump/src/pygribjump/pygribjump.py b/pygribjump/src/pygribjump/pygribjump.py index 27ee0ec..b01ef16 100644 --- a/pygribjump/src/pygribjump/pygribjump.py +++ b/pygribjump/src/pygribjump/pygribjump.py @@ -258,7 +258,7 @@ class ExtractionRequest: The ranges to extract. """ def __init__(self, req, ranges, gridHash=None): - reqstr = dic_to_request(req) + reqstr = dic_to_request(req) rangestr = list_to_rangestr(ranges) request = ffi.new('gribjump_extraction_request_t**') c_reqstr = ffi.new("char[]", reqstr.encode()) diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index f69ede9..d9a6c1c 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -45,6 +45,8 @@ Engine::~Engine() {} metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ){ // Split strings into one unified map // We also canonicalise the requests such that their keys are in alphabetical order + /// @todo: Note that it is not in general possible to arbitrary requests into a single request. In future, we should look into + /// merging into the minimum number of requests. std::map> keyValues; for (auto& r : requests) { diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index f7a44ca..2f576f0 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -14,8 +14,6 @@ #include "eckit/value/Value.h" #include "eckit/io/Buffer.h" -#include "metkit/mars/MarsParser.h" - namespace gribjump { namespace { diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index add8112..83e49e0 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -65,8 +65,6 @@ class ExtractionResult { private: // members std::vector> values_; std::vector>> mask_; - - friend class Serialiser; }; //---------------------------------------------------------------------------------------------------------------------- @@ -94,8 +92,6 @@ class ExtractionRequest { std::vector ranges_; std::string request_; std::string gridHash_; - - friend class Serialiser; }; //---------------------------------------------------------------------------------------------------------------------- From 656eac437cc9e71bdf0fccfb44c07e64c7ec654d Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Wed, 13 Nov 2024 17:41:57 +0000 Subject: [PATCH 8/8] Bump version 0.7.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 844f6a9..faef31a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.3 +0.7.0