From 0c24f4fce751dd170e82005f0abb89751dbf0cb2 Mon Sep 17 00:00:00 2001 From: Xu Yifeng Date: Tue, 7 Nov 2023 02:25:50 +0000 Subject: [PATCH] Fix side effect of Remove operation. In the original ARC paper, there was no explanation of how Remove should be handled when the cache is full. The Remove operation would have a side effect on subsequent Put operations when the cache is not full, which is still evicting items even though it's clear that there is no need to evict when the cache is not full. Now, code has been added to check if the cache is full to avoid performance degradation. - fix c_, it should be max_count. - add unittest. - use list::splice to avoid copying value - add suffix to member fields list and map Signed-off-by: Xu Yifeng --- curvefs/test/common/arc_cache_test.cpp | 204 +++++++++++++++++++ src/common/arc_cache.h | 269 ++++++++++++++++--------- 2 files changed, 379 insertions(+), 94 deletions(-) create mode 100644 curvefs/test/common/arc_cache_test.cpp diff --git a/curvefs/test/common/arc_cache_test.cpp b/curvefs/test/common/arc_cache_test.cpp new file mode 100644 index 0000000000..ad2aa952cc --- /dev/null +++ b/curvefs/test/common/arc_cache_test.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2020 NetEase Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Project: curve + * Created Date: 20231213 + * Author: xuyifeng + */ + +#include +#include +#include + +#include "src/common/lru_cache.h" + +namespace curve { +namespace common { + +static void +assert_cache_metrics(std::shared_ptr> cache) { + auto metrics = cache->GetCacheMetrics(); + auto arcSize = cache->ArcSize(); + /* sizeof(key) + sizeof(value), yet sizeof(int) + sizeof(value) */ + auto sizeofKey = sizeof(int); + auto sizeofValue = sizeof(int); + ASSERT_EQ(arcSize.BSize() * sizeofKey + + arcSize.TSize() * (sizeofKey + sizeofValue), + metrics->cacheBytes.get_value()); +} + +TEST(ArcTest, test_cache_create) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + + ASSERT_EQ(cache->Capacity(), 5); + ASSERT_EQ(cache->Size(), 0); +} + +TEST(ArcTest, test_cache_put) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + ASSERT_TRUE(cache->Size() == maxCount); + + int v; + for (int i = 0; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + // run again trigger Touch() internally + for (int i = 0; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + assert_cache_metrics(cache); +} + +TEST(ArcTest, test_cache_getlast) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + for (int i = 0; i < maxCount; ++i) { + int k; + ASSERT_TRUE(cache->GetLast(i, &k)); + ASSERT_EQ(k, i); + } +} + +TEST(ArcTest, test_cache_getlast2) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + int k, v; + ASSERT_TRUE(cache->GetLast(&k, &v)); + ASSERT_EQ(k, 0); + ASSERT_EQ(v, 0); +} + +static bool filter_check_3(const int &v) { + return v == 3; +} + +TEST(ArcTest, test_cache_getlast3) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + int k, v; + ASSERT_TRUE(cache->GetLast(&k, &v, filter_check_3)); + ASSERT_EQ(k, 3); + ASSERT_EQ(v, 3); +} + +TEST(ArcTest, test_cache_retire) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount+1; ++i) { + cache->Put(i, i); + } + + ASSERT_TRUE(cache->Size() == maxCount); + + int v; + ASSERT_TRUE(cache->Get(0, &v) == false); + for (int i = 1; i < maxCount+1; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + int removed_count = 0; + for (int i = 100; i < 200; ++i) { + int removed; + int ret = cache->Put(i, i, &removed); + if (ret) { + removed_count++; + // This checks Arc cache's scan-resistent behavior, + // first an item in t2 is eliminated, after that, + // items in t1 is replaced + ASSERT_TRUE(removed == 1 || removed >= 100); + } + (void)removed; + } + ASSERT_EQ(removed_count, (200 - 100)); + + auto s = cache->ArcSize(); + ASSERT_TRUE(s.BSize() + s.TSize() <= 2 * maxCount); + + assert_cache_metrics(cache); +} + +TEST(ArcTest, test_cache_remove) { + int maxCount = 5; + auto cache = std::make_shared>(maxCount, + std::make_shared("Cache")); + auto metrics = cache->GetCacheMetrics(); + + for (int i = 0; i < maxCount; ++i) { + cache->Put(i, i); + } + + cache->Remove(0); + int v; + ASSERT_FALSE(cache->Get(0, &v)); + ASSERT_TRUE(cache->Size() == maxCount-1); + + for (int i = 1; i < maxCount; ++i) { + ASSERT_TRUE(cache->Get(i, &v)); + ASSERT_EQ(v, i); + } + + for (int i = 100; i < 200; ++i) { + cache->Put(i, i); + } + + auto s = cache->ArcSize(); + ASSERT_TRUE(s.BSize() + s.TSize() <= 2 * maxCount); + assert_cache_metrics(cache); +} + +} // namespace common +} // namespace curve + diff --git a/src/common/arc_cache.h b/src/common/arc_cache.h index 98e4eb7bfc..f83c10e5e4 100644 --- a/src/common/arc_cache.h +++ b/src/common/arc_cache.h @@ -26,18 +26,28 @@ #include #include +#include #include #include #include #include +// Adaptive Replacement Cache (ARC) template , typename ValueTraits = CacheTraits> class ARCCache : public LRUCacheInterface { public: - ARCCache(int64_t max_count, + struct ArcSizeInfo { + uint64_t b1, t1; + uint64_t b2, t2; + + uint64_t BSize() const { return b1 + b2; } + uint64_t TSize() const { return t1 + t2; } + }; + + ARCCache(uint64_t max_count, std::shared_ptr cacheMetrics = nullptr) - : c_(max_count/2), p_(0), cacheMetrics_(cacheMetrics) {} + : c_(max_count), p_(0), cacheMetrics_(cacheMetrics) {} void Put(const K& key, const V& value); bool Put(const K& key, const V& value, V* eliminated); @@ -49,8 +59,13 @@ class ARCCache : public LRUCacheInterface { bool GetLast(const V value, K* key); bool GetLast(K* key, V* value); bool GetLast(K* key, V* value, bool (*f)(const V& value)); + uint64_t Capacity() const { return c_; } + ArcSizeInfo ArcSize() const; private: + ARCCache(const ARCCache&) = delete; + void operator=(const ARCCache&) = delete; + struct BMapVal; struct TMapVal; struct BListVal; @@ -69,33 +84,27 @@ class ARCCache : public LRUCacheInterface { struct BMapVal { blist_iter list_iter; - BMapVal() {} - BMapVal(const BMapVal& o) { list_iter = o.list_iter; } - BMapVal& operator=(const BMapVal& o) { list_iter = o.list_iter; } + BMapVal() = default; + BMapVal(const BMapVal& o) = default; + BMapVal& operator=(const BMapVal& o) = default; BMapVal(const blist_iter& iter) // NOLINT : list_iter(iter) {} }; struct TMapVal { tlist_iter list_iter; - TMapVal() {} - TMapVal(const TMapVal& o) { list_iter = o.list_iter; } - TMapVal& operator=(const TMapVal& o) { - list_iter = o.list_iter; - return *this; - } + TMapVal() = default; + TMapVal(const TMapVal& o) = default; + TMapVal& operator=(const TMapVal& o) = default; TMapVal(const tlist_iter& iter) // NOLINT : list_iter(iter) {} }; struct BListVal { bmap_iter map_iter; - BListVal() {} - BListVal(const BListVal& o) { map_iter = o.map_iter; } - BListVal& operator=(const BListVal& o) { - map_iter = o.map_iter; - return *this; - } + BListVal() = default; + BListVal(const BListVal& o) = default; + BListVal& operator=(const BListVal& o) = default; BListVal(const bmap_iter& iter) // NOLINT :map_iter(iter) {} }; @@ -103,29 +112,22 @@ class ARCCache : public LRUCacheInterface { tmap_iter map_iter; V value; - TListVal() {} - TListVal(const TListVal& o) { - map_iter = o.map_iter; - value = o.value; - } + TListVal() = default; + TListVal(const TListVal& o) = default; TListVal(const tmap_iter& iter, const V& v) : map_iter(iter), value(v) {} - TListVal(const V& v) // NOLINT + explicit TListVal(const V& v) : value(v) {} - TListVal& operator=(const TListVal& o) { - map_iter = o.map_iter; - value = o.value; - return *this; - } + TListVal& operator=(const TListVal& o) = default; }; struct B { - BMap map; - BList list; + BMap map_; + BList list_; bool Find(const K& k, bmap_iter* iter) { - auto it = map.find(k); - if (it != map.end()) { + auto it = map_.find(k); + if (it != map_.end()) { if (iter != nullptr) *iter = it; return true; } @@ -133,20 +135,20 @@ class ARCCache : public LRUCacheInterface { } void Insert(const K& k) { - auto r = map.insert({k, blist_iter()}); + auto r = map_.insert({k, blist_iter()}); assert(r.second); - list.push_back(r.first); - r.first->second.list_iter = --list.end(); + list_.push_back(r.first); + r.first->second.list_iter = --list_.end(); } void RemoveLRU(CacheMetrics* m) { - if (list.empty()) return; + if (list_.empty()) return; if (m) { m->UpdateRemoveFromCacheBytes( - KeyTraits::CountBytes(list.front().map_iter->first)); + KeyTraits::CountBytes(list_.front().map_iter->first)); } - map.erase(list.front().map_iter); - list.pop_front(); + map_.erase(list_.front().map_iter); + list_.pop_front(); } void Remove(bmap_iter&& map_iter, CacheMetrics* m) { @@ -154,22 +156,22 @@ class ARCCache : public LRUCacheInterface { m->UpdateRemoveFromCacheBytes( KeyTraits::CountBytes(map_iter->first)); } - list.erase(map_iter->second.list_iter); - map.erase(map_iter); + list_.erase(map_iter->second.list_iter); + map_.erase(map_iter); } - int Count() const { return map.size(); } + uint64_t Count() const { return map_.size(); } }; struct T { - TMap map; - TList list; + TMap map_; + TList list_; void Insert(const K& k, const V& v, CacheMetrics* m) { - auto r = map.insert({k, tlist_iter()}); + auto r = map_.insert({k, tlist_iter()}); assert(r.second); - list.emplace_back(r.first, v); - r.first->second.list_iter = --list.end(); + list_.emplace_back(r.first, v); + r.first->second.list_iter = --list_.end(); if (m != nullptr) { m->UpdateAddToCacheCount(); m->UpdateAddToCacheBytes(KeyTraits::CountBytes(k) + @@ -177,9 +179,42 @@ class ARCCache : public LRUCacheInterface { } } + // Move key-value from another to this. For C++17 and later, + // we use extract() and insert() to improve performance. + void Move(const K& k, T* other, tmap_iter&& other_it, const V *v, + CacheMetrics *m) { + list_.splice(list_.end(), other->list_, other_it->second.list_iter); + if (v != nullptr) { + if (m != nullptr) { + uint64_t oldSize = + ValueTraits::CountBytes(list_.back().value); + uint64_t newSize = ValueTraits::CountBytes(*v); + if (oldSize != newSize) { + if (oldSize != 0) + m->UpdateRemoveFromCacheBytes(oldSize); + m->UpdateAddToCacheBytes(newSize); + } + } + list_.back().value = *v; + } +#if __cplusplus >= 201703L + auto node = other->map_.extract(other_it); + auto r = map_.insert(std::move(node)); + assert(r.inserted); + list_.back().map_iter = r.position; + r.position->second.list_iter = --list_.end(); +#else + auto r = map_.insert({k, tlist_iter()}); + assert(r.second); + list_.back().map_iter = r.first; + r.first->second.list_iter = --list_.end(); + other->map_.erase(other_it); +#endif + } + bool Find(const K& k, tmap_iter* map_iter) { - auto it = map.find(k); - if (it != map.end()) { + auto it = map_.find(k); + if (it != map_.end()) { if (map_iter != nullptr) *map_iter = it; return true; } @@ -194,28 +229,28 @@ class ARCCache : public LRUCacheInterface { KeyTraits::CountBytes(map_iter->first) + ValueTraits::CountBytes(map_iter->second.list_iter->value)); } - list.erase(map_iter->second.list_iter); - map.erase(map_iter); + list_.erase(map_iter->second.list_iter); + map_.erase(map_iter); } bool RemoveLRU(V* eliminated, CacheMetrics* m) { - if (list.empty()) return false; + if (list_.empty()) return false; if (eliminated != nullptr) { - *eliminated = list.front().value; + *eliminated = list_.front().value; } if (m != nullptr) { m->UpdateRemoveFromCacheCount(); m->UpdateRemoveFromCacheBytes( - KeyTraits::CountBytes(list.front().map_iter->first) + - ValueTraits::CountBytes(list.front().value)); + KeyTraits::CountBytes(list_.front().map_iter->first) + + ValueTraits::CountBytes(list_.front().value)); } - map.erase(list.front().map_iter); - list.pop_front(); + map_.erase(list_.front().map_iter); + list_.pop_front(); return true; } void Touch(const tmap_iter& map_iter, const V* v, CacheMetrics* m) { - auto list_iter = --list.end(); + auto list_iter = --list_.end(); if (v && m) { uint64_t oldSize = ValueTraits::CountBytes(map_iter->second.list_iter->value); @@ -229,18 +264,17 @@ class ARCCache : public LRUCacheInterface { if (v != nullptr) list_iter->value = *v; return; } + + list_.splice(list_.end(), list_, map_iter->second.list_iter); if (v != nullptr) { - list.push_back(*v); - } else { - list.push_back(map_iter->second.list_iter->value); + list_.back().value = *v; } - list.erase(map_iter->second.list_iter); - map_iter->second.list_iter = --list.end(); + map_iter->second.list_iter = --list_.end(); } - int Count() const { return map.size(); } + uint64_t Count() const { return map_.size(); } - tmap_iter GetLRU() const { return list.begin()->map_iter; } + tmap_iter GetLRU() const { return list_.begin()->map_iter; } }; bool Move_T_B(T* t, B* b, V* eliminated) { @@ -262,11 +296,33 @@ class ARCCache : public LRUCacheInterface { return true; } + bool IsCacheFull() const { + return t1_.Count() + t2_.Count() == c_; + } + + void IncreaseP(uint64_t delta) { + if (!IsCacheFull()) + return; + if (delta > c_ - p_) + p_ = c_; + else + p_ += delta; + } + + void DecreaseP(uint64_t delta) { + if (!IsCacheFull()) + return; + if (delta > p_) + p_ = 0; + else + p_ -= delta; + } + bool Replace(const K& k, V* eliminated); - ::curve::common::RWLock lock_; - int64_t c_; - int64_t p_; + mutable ::curve::common::RWLock lock_; + uint64_t c_; + uint64_t p_; B b1_, b2_; T t1_, t2_; std::shared_ptr cacheMetrics_; @@ -279,8 +335,7 @@ bool ARCCache::Get(const K& key, V* value) { if (t1_.Find(key, &it)) { if (value) *value = it->second.list_iter->value; - t2_.Insert(key, it->second.list_iter->value, nullptr); - t1_.Remove(std::move(it), nullptr); + t2_.Move(key, &t1_, std::move(it), nullptr, cacheMetrics_.get()); if (cacheMetrics_ != nullptr) { cacheMetrics_->OnCacheHit(); } @@ -313,23 +368,24 @@ bool ARCCache::Put(const K& key, const V& value, bool ret = false; if (t1_.Find(key, &it)) { - t2_.Insert(key, value, nullptr); - t1_.Remove(std::move(it), nullptr); + t2_.Move(key, &t1_, std::move(it), &value, cacheMetrics_.get()); + if (cacheMetrics_ != nullptr) { + cacheMetrics_->OnCacheHit(); + } return false; } if (t2_.Find(key, &it)) { t2_.Touch(it, &value, cacheMetrics_.get()); + if (cacheMetrics_ != nullptr) { + cacheMetrics_->OnCacheHit(); + } return false; } bmap_iter b_it; if (b1_.Find(key, &b_it)) { - if (b1_.Count() >= b2_.Count()) { - p_ += 1; - } else { - p_ += b2_.Count() / b1_.Count(); - } - if (p_ > c_) p_ = c_; + uint64_t delta = std::min((uint64_t)1, b2_.Count() / b1_.Count()); + IncreaseP(delta); ret = Replace(key, eliminated); b1_.Remove(std::move(b_it), cacheMetrics_.get()); @@ -338,12 +394,8 @@ bool ARCCache::Put(const K& key, const V& value, } if (b2_.Find(key, &b_it)) { - if (b2_.Count() >= b1_.Count()) { - p_ -= 1; - } else { - p_ -= b1_.Count() / b2_.Count(); - } - if (p_ < 0) p_ = 0; + uint64_t delta = std::max((uint64_t)1, b1_.Count() / b2_.Count()); + DecreaseP(delta); ret = Replace(key, eliminated); b2_.Remove(std::move(b_it), cacheMetrics_.get()); @@ -351,7 +403,7 @@ bool ARCCache::Put(const K& key, const V& value, return ret; } - if (t1_.Count() + b1_.Count() == c_) { + if (IsCacheFull() && t1_.Count() + b1_.Count() == c_) { if (t1_.Count() < c_) { b1_.RemoveLRU(cacheMetrics_.get()); ret = Replace(key, eliminated); @@ -361,8 +413,14 @@ bool ARCCache::Put(const K& key, const V& value, } else if (t1_.Count() + b1_.Count() < c_) { auto total = t1_.Count() + b1_.Count() + t2_.Count() + b2_.Count(); if (total >= c_) { - if (total == 2 * c_) b2_.RemoveLRU(cacheMetrics_.get()); - Replace(key, eliminated); + if (total == 2 * c_) { + if (b2_.Count() > 0) { + b2_.RemoveLRU(cacheMetrics_.get()); + } else { + b1_.RemoveLRU(cacheMetrics_.get()); + } + } + ret = Replace(key, eliminated); } } t1_.Insert(key, value, cacheMetrics_.get()); @@ -372,11 +430,16 @@ bool ARCCache::Put(const K& key, const V& value, template bool ARCCache::Replace(const K& k, V* eliminated) { + if (!IsCacheFull()) { + return false; + } if (t1_.Count() != 0 && ((t1_.Count() > p_) || (b2_.Find(k, nullptr) && t1_.Count() == p_))) { return Move_T_B(&t1_, &b1_, eliminated); - } else { + } else if (t2_.Count() > 0) { return Move_T_B(&t2_, &b2_, eliminated); + } else { + return Move_T_B(&t1_, &b1_, eliminated); } } @@ -386,6 +449,16 @@ uint64_t ARCCache::Size() { return t1_.Count() + t2_.Count(); } +template +typename ARCCache::ArcSizeInfo +ARCCache::ArcSize() const { + ::curve::common::ReadLockGuard guard(lock_); + + return {b1_.Count(), t1_.Count(), + b2_.Count(), t2_.Count()}; +} + +// This operation detach the key from cache template void ARCCache::Remove(const K& key) { ::curve::common::WriteLockGuard guard(lock_); @@ -397,6 +470,14 @@ void ARCCache::Remove(const K& key) { return; } } + B* bs[]{&b1_, &b2_}; + for (auto b : bs) { + bmap_iter it; + if (b->Find(key, &it)) { + b->Remove(std::move(it), cacheMetrics_.get()); + return; + } + } } template @@ -412,9 +493,9 @@ bool ARCCache::GetLast(const V value, K* key) { ::curve::common::ReadLockGuard guard(lock_); T* ts[]{&t1_, &t2_}; for (auto t : ts) { - for (const auto& item : t->list) { + for (const auto& item : t->list_) { if (item.value == value) { - *key = item->map_iter->first; + *key = item.map_iter->first; return true; } } @@ -428,9 +509,9 @@ bool ARCCache::GetLast(K* key, V* value) { T* ts[]{&t1_, &t2_}; for (auto t : ts) { - if (!t->list.empty()) { - *key = t->list.front().map_iter->first; - *value = t->list.front().value; + if (!t->list_.empty()) { + *key = t->list_.front().map_iter->first; + *value = t->list_.front().value; return true; } } @@ -445,7 +526,7 @@ bool ARCCache::GetLast( T* ts[]{&t1_, &t2_}; for (auto t : ts) { - for (const auto& item : t->list) { + for (const auto& item : t->list_) { bool ok = f(item.value); if (ok) { *key = item.map_iter->first;