diff --git a/db/blob/blob_file_cache.cc b/db/blob/blob_file_cache.cc index 5f340aadf55..1b9faa238c6 100644 --- a/db/blob/blob_file_cache.cc +++ b/db/blob/blob_file_cache.cc @@ -42,6 +42,7 @@ Status BlobFileCache::GetBlobFileReader( assert(blob_file_reader); assert(blob_file_reader->IsEmpty()); + // NOTE: sharing same Cache with table_cache const Slice key = GetSliceForKey(&blob_file_number); assert(cache_); @@ -98,4 +99,13 @@ Status BlobFileCache::GetBlobFileReader( return Status::OK(); } +void BlobFileCache::Evict(uint64_t blob_file_number) { + // NOTE: sharing same Cache with table_cache + const Slice key = GetSliceForKey(&blob_file_number); + + assert(cache_); + + cache_.get()->Erase(key); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/blob/blob_file_cache.h b/db/blob/blob_file_cache.h index 740e67ada6c..6858d012b59 100644 --- a/db/blob/blob_file_cache.h +++ b/db/blob/blob_file_cache.h @@ -36,6 +36,15 @@ class BlobFileCache { uint64_t blob_file_number, CacheHandleGuard* blob_file_reader); + // Called when a blob file is obsolete to ensure it is removed from the cache + // to avoid effectively leaking the open file and assicated memory + void Evict(uint64_t blob_file_number); + + // Used to identify cache entries for blob files (not normally useful) + static const Cache::CacheItemHelper* GetHelper() { + return CacheInterface::GetBasicHelper(); + } + private: using CacheInterface = BasicTypedCacheInterface; diff --git a/db/column_family.h b/db/column_family.h index ff038d8dfc5..9e7f52bd011 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -402,6 +402,7 @@ class ColumnFamilyData { SequenceNumber earliest_seq); TableCache* table_cache() const { return table_cache_.get(); } + BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); } BlobSource* blob_source() const { return blob_source_.get(); } // See documentation in compaction_picker.h diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 48f6529beef..e4781858c0e 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -659,8 +659,9 @@ Status DBImpl::CloseHelper() { // We need to release them before the block cache is destroyed. The block // cache may be destroyed inside versions_.reset(), when column family data // list is destroyed, so leaving handles in table cache after - // versions_.reset() may cause issues. - // Here we clean all unreferenced handles in table cache. + // versions_.reset() may cause issues. Here we clean all unreferenced handles + // in table cache, and (for certain builds/conditions) assert that no obsolete + // files are hanging around unreferenced (leak) in the table/blob file cache. // Now we assume all user queries have finished, so only version set itself // can possibly hold the blocks from block cache. After releasing unreferenced // handles here, only handles held by version set left and inside @@ -668,6 +669,9 @@ Status DBImpl::CloseHelper() { // time a handle is released, we erase it from the cache too. By doing that, // we can guarantee that after versions_.reset(), table cache is empty // so the cache can be safely destroyed. +#ifndef NDEBUG + TEST_VerifyNoObsoleteFilesCached(/*db_mutex_already_held=*/true); +#endif // !NDEBUG table_cache_->EraseUnRefEntries(); for (auto& txn_entry : recovered_transactions_) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index b81110fa9fe..614574bd38a 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1239,9 +1239,14 @@ class DBImpl : public DB { static Status TEST_ValidateOptions(const DBOptions& db_options) { return ValidateOptions(db_options); } - #endif // NDEBUG + // In certain configurations, verify that the table/blob file cache only + // contains entries for live files, to check for effective leaks of open + // files. This can only be called when purging of obsolete files has + // "settled," such as during parts of DB Close(). + void TEST_VerifyNoObsoleteFilesCached(bool db_mutex_already_held) const; + // persist stats to column family "_persistent_stats" void PersistStats(); diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index dac5e00359f..c91a5779609 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -9,6 +9,7 @@ #ifndef NDEBUG +#include "db/blob/blob_file_cache.h" #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/error_handler.h" @@ -321,5 +322,49 @@ size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const { InstrumentedMutexLock l(&const_cast(this)->stats_history_mutex_); return EstimateInMemoryStatsHistorySize(); } + +void DBImpl::TEST_VerifyNoObsoleteFilesCached( + bool db_mutex_already_held) const { + // This check is somewhat expensive and obscure to make a part of every + // unit test in every build variety. Thus, we only enable it for ASAN builds. + if (!kMustFreeHeapAllocations) { + return; + } + + std::optional l; + if (db_mutex_already_held) { + mutex_.AssertHeld(); + } else { + l.emplace(&mutex_); + } + + std::vector live_files; + for (auto cfd : *versions_->GetColumnFamilySet()) { + if (cfd->IsDropped()) { + continue; + } + // Sneakily add both SST and blob files to the same list + cfd->current()->AddLiveFiles(&live_files, &live_files); + } + std::sort(live_files.begin(), live_files.end()); + + auto fn = [&live_files](const Slice& key, Cache::ObjectPtr, size_t, + const Cache::CacheItemHelper* helper) { + if (helper != BlobFileCache::GetHelper()) { + // Skip non-blob files for now + // FIXME: diagnose and fix the leaks of obsolete SST files revealed in + // unit tests. + return; + } + // See TableCache and BlobFileCache + assert(key.size() == sizeof(uint64_t)); + uint64_t file_number; + GetUnaligned(reinterpret_cast(key.data()), &file_number); + // Assert file is in sorted live_files + assert( + std::binary_search(live_files.begin(), live_files.end(), file_number)); + }; + table_cache_->ApplyToAllEntries(fn, {}); +} } // namespace ROCKSDB_NAMESPACE #endif // NDEBUG diff --git a/db/table_cache.cc b/db/table_cache.cc index 1e1a9471653..28b25b880c8 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -164,6 +164,7 @@ Status TableCache::GetTableReader( } Cache::Handle* TableCache::Lookup(Cache* cache, uint64_t file_number) { + // NOTE: sharing same Cache with BlobFileCache Slice key = GetSliceForFileNumber(&file_number); return cache->Lookup(key); } @@ -178,6 +179,7 @@ Status TableCache::FindTable( size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) { PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock); uint64_t number = file_meta.fd.GetNumber(); + // NOTE: sharing same Cache with BlobFileCache Slice key = GetSliceForFileNumber(&number); *handle = cache_.Lookup(key); TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0", diff --git a/db/version_builder.cc b/db/version_builder.cc index 1343d113e98..acec67fd15e 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -24,6 +24,7 @@ #include #include "cache/cache_reservation_manager.h" +#include "db/blob/blob_file_cache.h" #include "db/blob/blob_file_meta.h" #include "db/dbformat.h" #include "db/internal_stats.h" @@ -744,12 +745,9 @@ class VersionBuilder::Rep { return Status::Corruption("VersionBuilder", oss.str()); } - // Note: we use C++11 for now but in C++14, this could be done in a more - // elegant way using generalized lambda capture. - VersionSet* const vs = version_set_; - const ImmutableCFOptions* const ioptions = ioptions_; - - auto deleter = [vs, ioptions](SharedBlobFileMetaData* shared_meta) { + auto deleter = [vs = version_set_, ioptions = ioptions_, + bc = cfd_ ? cfd_->blob_file_cache() + : nullptr](SharedBlobFileMetaData* shared_meta) { if (vs) { assert(ioptions); assert(!ioptions->cf_paths.empty()); @@ -758,6 +756,9 @@ class VersionBuilder::Rep { vs->AddObsoleteBlobFile(shared_meta->GetBlobFileNumber(), ioptions->cf_paths.front().path); } + if (bc) { + bc->Evict(shared_meta->GetBlobFileNumber()); + } delete shared_meta; }; @@ -766,7 +767,7 @@ class VersionBuilder::Rep { blob_file_number, blob_file_addition.GetTotalBlobCount(), blob_file_addition.GetTotalBlobBytes(), blob_file_addition.GetChecksumMethod(), - blob_file_addition.GetChecksumValue(), deleter); + blob_file_addition.GetChecksumValue(), std::move(deleter)); mutable_blob_file_metas_.emplace( blob_file_number, MutableBlobFileMetaData(std::move(shared_meta))); diff --git a/db/version_set.h b/db/version_set.h index f117b6082cd..18a2af62f51 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1526,7 +1526,6 @@ class VersionSet { void GetLiveFilesMetaData(std::vector* metadata); void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) { - // TODO: Erase file from BlobFileCache? obsolete_blob_files_.emplace_back(blob_file_number, std::move(path)); } diff --git a/unreleased_history/bug_fixes/blob_file_leak.md b/unreleased_history/bug_fixes/blob_file_leak.md new file mode 100644 index 00000000000..57f54df7677 --- /dev/null +++ b/unreleased_history/bug_fixes/blob_file_leak.md @@ -0,0 +1 @@ +* Fix a leak of obsolete blob files left open until DB::Close(). This bug was introduced in version 9.4.0.