Skip to content

Commit

Permalink
Update caching
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisspyB committed Jan 10, 2024
1 parent 3b4c666 commit e633fa1
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 242 deletions.
76 changes: 3 additions & 73 deletions src/gribjump/GribInfoCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,6 @@ GribInfoCache::GribInfoCache(){}

GribInfoCache::GribInfoCache(eckit::PathName dir) : cacheDir_(dir) {
ASSERT(cacheDir_.exists());
const eckit::PathName path = cacheDir_ / "manifest.gj";
if(path.exists()){
eckit::FileStream s(path, "r");
s >> manifest_;
s.close();
}
}

void GribInfoCache::preload() {
for (auto& entry : manifest_) {
const eckit::PathName infopath = cacheDir_ / entry.second;
eckit::FileStream s(infopath, "r");
std::map<std::string, JumpInfo> cache;
s >> cache;
s.close();
cache_.merge(cache);
}
}

bool GribInfoCache::contains(const fdb5::FieldLocation& loc) {
Expand All @@ -57,20 +40,14 @@ bool GribInfoCache::contains(const fdb5::FieldLocation& loc) {
return true;
}

// Check if field's filename is in manifest
const auto el = manifest_.find(fdbfilename);
if (el == manifest_.end()) {
return false;
}

// Check if gribinfo cache file exists (i.e. manifest is not stale)
eckit::PathName infopath = cacheDir_ / el->second;
eckit::PathName infopath = cacheDir_ / fdbfilename + ".gj";
if (!infopath.exists()) {
return false;
}

// This field is in the cache, but not in memory. Load it.
eckit::Log::debug<LibGribJump>() << "Merging " << infopath << " with cache" << std::endl;
// Field should be cached on disk, but is not in memory.
eckit::Log::debug<LibGribJump>() << "Loading " << infopath << " into cache" << std::endl;
eckit::FileStream s(infopath, "r");
std::map<std::string, JumpInfo> cache;
s >> cache;
Expand All @@ -96,59 +73,12 @@ void GribInfoCache::print(std::ostream& s) const {
// Print the manifest, then the cache
s << "GribInfoCache[";
s << "cacheDir=" << cacheDir_ << std::endl;
s << "#entries=" << manifest_.size() << std::endl;
for (auto& entry : manifest_) {
s << entry.first << " -> " << entry.second << std::endl;
}
s << "cache=" << std::endl;
for (auto& entry : cache_) {
s << entry.first << " -> " << entry.second << std::endl;
}
s << "]";
}

void GribInfoCache::removeOld(int Ndays){
// remove entries in manifest older than Ndays
// and delete the corresponding gribinfo files.

eckit::Date now(eckit::TimeStamp("%Y-%m-%d"));
std::vector<std::string> toRemove;

for (auto& entry : manifest_) {
// Get date from filename
std::string timestamp = entry.second.substr(0, 10);
eckit::Date date(timestamp);

if (now - date > Ndays) {
eckit::PathName infopath = cacheDir_ / entry.second;
toRemove.push_back(entry.first);
}
}

for (auto& key : toRemove) {
eckit::PathName infopath = cacheDir_ / manifest_.at(key);
// Paranoia: ensure the file ends with .gj before deleting.
ASSERT(infopath.baseName().extension() == ".gj");
infopath.unlink();
manifest_.erase(key);
eckit::Log::debug<LibGribJump>() << "Removed " << infopath << " from manifest" << std::endl;
}
}

void GribInfoCache::dump() const{
// Dump the manifest to disk, overwriting the old one.
eckit::PathName manifestpath = cacheDir_ / "manifest.gj";
eckit::FileStream s(manifestpath, "w");
s << manifest_;
s.close();
}

bool GribInfoCache::lookup(const std::string& fdbfilename) const{
// Check if field's filename is in manifest
return manifest_.count(fdbfilename) != 0;
}
void GribInfoCache::append(const std::string& fdbfilename, const std::string& gribinfofilename){
manifest_[fdbfilename] = gribinfofilename;
}

} // namespace gribjump
10 changes: 0 additions & 10 deletions src/gribjump/GribInfoCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,13 @@ class GribInfoCache {
// Get gribinfo from memory
const JumpInfo& get(const fdb5::FieldLocation& loc);

// Preload all gribinfos listed in manifest into memory
void preload();

void print(std::ostream& s) const;

// Manifest maintenance
bool lookup(const std::string& fdbfilename) const;
void append(const std::string& fdbfilename, const std::string& gribinfofilename);
void removeOld(int days);
void dump() const;

private:

eckit::PathName cacheDir_;

// fieldlocation's fdb filename -> gribinfo filename
std::map<std::string, std::string> manifest_;

// fieldlocation's full name -> gribinfo
std::map<std::string, JumpInfo> cache_;
Expand Down
12 changes: 1 addition & 11 deletions src/gribjump/LocalGribJump.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,6 @@ LocalGribJump::LocalGribJump(const Config& config): GribJumpBase(config) {
eckit::Log::debug<LibGribJump>() << "GribJump not using cache" << std::endl;
return;
}
eckit::PathName manifestPath = eckit::PathName(cacheDir) / "manifest.gj";
if (!manifestPath.exists()) {
eckit::Log::warning() << "Warning " << manifestPath << " does not exist." << std::endl;
eckit::Log::debug<LibGribJump>() << "GribJump not using cache" << std::endl;
return;
}

eckit::Log::debug<LibGribJump>() << "GribJump is using cache" << std::endl;
cache_ = GribInfoCache(cacheDir);
Expand Down Expand Up @@ -139,14 +133,10 @@ ExtractionResult LocalGribJump::directJump(eckit::DataHandle* handle,
return info.extractRanges(dataSource, ranges);
}

bool LocalGribJump::isCached(std::string key) const {
NOTIMP;
}

JumpInfo LocalGribJump::extractInfo(const fdb5::FieldLocation& loc) {
if (cacheEnabled_) {
if(cache_.contains(loc)) return cache_.get(loc);
eckit::Log::debug<LibGribJump>() << "GribJump::extractInfo() cache miss" << std::endl;
eckit::Log::debug<LibGribJump>() << "GribJump::extractInfo() cache miss for file " << loc.uri().path().baseName() << std::endl;
}

eckit::DataHandle* handle = loc.dataHandle();
Expand Down
2 changes: 0 additions & 2 deletions src/gribjump/LocalGribJump.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ class LocalGribJump : public GribJumpBase {
// JumpInfo extractInfo(eckit::DataHandle* handle) const;
JumpInfo extractInfo(const fdb5::FieldLocation& loc);

bool isCached(std::string) const;

std::map<std::string, std::unordered_set<std::string>> axes(const std::string& request) override;


Expand Down
4 changes: 2 additions & 2 deletions src/tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ ecbuild_add_executable( TARGET gj-testc
LIBS gribjump
)

ecbuild_add_executable( TARGET gj-cache
SOURCES gj-cache.cc
ecbuild_add_executable( TARGET gribinfo-fromfile
SOURCES infofromfile.cc
INCLUDES ${ECKIT_INCLUDE_DIRS}
LIBS gribjump
)
144 changes: 0 additions & 144 deletions src/tools/gj-cache.cc

This file was deleted.

Loading

0 comments on commit e633fa1

Please sign in to comment.