From b5dea3065adff116583f9e8f002ca4f5f0ac52ff Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 28 Jan 2022 16:17:08 +0800
Subject: [PATCH 01/21] feat: implement long adder to optimize the counter of
 new metrics system (#1033)

---
 include/dsn/utility/long_adder.h              | 237 +++++++++++++++
 include/dsn/utility/ports.h                   |  28 ++
 src/utils/CMakeLists.txt                      |   1 +
 src/utils/long_adder.cpp                      | 280 ++++++++++++++++++
 src/utils/long_adder_bench/CMakeLists.txt     |  39 +++
 .../long_adder_bench/long_adder_bench.cpp     | 198 +++++++++++++
 src/utils/test/long_adder_test.cpp            | 265 +++++++++++++++++
 7 files changed, 1048 insertions(+)
 create mode 100644 include/dsn/utility/long_adder.h
 create mode 100644 src/utils/long_adder.cpp
 create mode 100644 src/utils/long_adder_bench/CMakeLists.txt
 create mode 100644 src/utils/long_adder_bench/long_adder_bench.cpp
 create mode 100644 src/utils/test/long_adder_test.cpp
diff --git a/include/dsn/utility/long_adder.h b/include/dsn/utility/long_adder.h
new file mode 100644
index 0000000000..99a3d2d3bf
--- /dev/null
+++ b/include/dsn/utility/long_adder.h
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <functional>
+#include <memory>
+
+#include <dsn/utility/ports.h>
+
+// Refer to https://github.com/apache/kudu/blob/master/src/kudu/util/striped64.h
+
+namespace dsn {
+
+// Padded POD container for std::atomic<int64_t>. This prevents false sharing of cache lines.
+// Notice that in older versions of GCC `std::is_pod<std::atomic<int64_t>>::value` will return
+// false, thus cacheline_aligned_int64 is not considered to be a POD. However it doesn't matter.
+class cacheline_aligned_int64
+{
+public:
+    static constexpr int kAtomicInt64Size = sizeof(std::atomic<int64_t>);
+
+    cacheline_aligned_int64() = default;
+
+    inline bool compare_and_set(int64_t cmp, int64_t value)
+    {
+        return _value.compare_exchange_weak(cmp, value);
+    }
+
+    // Padding advice from Herb Sutter:
+    // http://www.drdobbs.com/parallel/eliminate-false-sharing/217500206?pgno=4
+    std::atomic<int64_t> _value;
+    char pad[CACHELINE_SIZE > kAtomicInt64Size ? CACHELINE_SIZE - kAtomicInt64Size : 1];
+
+    DISALLOW_COPY_AND_ASSIGN(cacheline_aligned_int64);
+} CACHELINE_ALIGNED;
+
+using cacheline_aligned_int64_ptr =
+    std::unique_ptr<cacheline_aligned_int64, std::function<void(cacheline_aligned_int64 *)>>;
+extern cacheline_aligned_int64_ptr new_cacheline_aligned_int64();
+extern cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size);
+
+// This set of classes is heavily derived from JSR166e, released into the public domain
+// by Doug Lea and the other authors.
+//
+// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/Striped64.java?view=co
+// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/LongAdder.java?view=co
+//
+// The striped64 and striped_long_adder implementations here are simplified versions of what's
+// present in JSR166e. However, the core ideas remain the same.
+//
+// Updating a single AtomicInteger in a multi-threaded environment can be quite slow:
+//
+//   1. False sharing of cache lines with other counters.
+//   2. Cache line bouncing from high update rates, especially with many cores.
+//
+// These two problems are addressed by striped64. When there is no contention, it uses CAS on a
+// single base counter to store updates. However, when striped64 detects contention
+// (via a failed CAS operation), it will allocate a small, fixed size hashtable of Cells.
+// A cacheline_aligned_int64 is a simple POD that pads out an atomic<int64_t> to 64 bytes to prevent
+// sharing a cache line.
+//
+// Reading the value of a striped64 requires traversing the hashtable to calculate the true sum.
+//
+// Each updating thread uses a thread-local hashcode to determine its cacheline_aligned_int64 in the
+// hashtable. If a thread fails to CAS its hashed cacheline_aligned_int64, it will do a lightweight
+// rehash operation to try and find an uncontended bucket. Because the hashcode is thread-local,
+// this rehash affects all striped64's accessed by the thread. This is good, since contention on one
+// striped64 is indicative of contention elsewhere too.
+//
+// The hashtable is statically sized to the nearest power of 2 greater than or equal to the
+// number of CPUs. This is sufficient, since this guarantees the existence of a perfect hash
+// function. Due to the random rehashing, the threads should eventually converge to this function.
+// In practice, this scheme has shown to be sufficient.
+//
+// The biggest simplification of this implementation compared to JSR166e is that we do not
+// dynamically grow the table, instead immediately allocating it to the full size.
+// We also do not lazily allocate each cacheline_aligned_int64, instead allocating the entire array
+// at once. This means we waste some additional memory in low contention scenarios, and initial
+// allocation will also be slower. Some of the micro-optimizations were also elided for readability.
+class striped64
+{
+public:
+    striped64() = default;
+
+protected:
+    // NOTE: the destructor is not virtual so that we can ensure that striped64
+    // has no vtable, thus reducing its size. We make it protected to ensure that
+    // no one attempts to delete a striped64* and invokes the wrong destructor.
+    ~striped64() = default;
+
+    enum rehash
+    {
+        kRehash,
+        kNoRehash
+    };
+
+    // CAS the base field.
+    inline bool cas_base(int64_t cmp, int64_t val) { return _base.compare_exchange_weak(cmp, val); }
+
+    // Handles cases of updates involving initialization, resizing, creating new Cells, and/or
+    // contention. See above for further explanation.
+    //
+    // 'Updater' should be a function which takes the current value and returns
+    // the new value.
+    template <class Updater>
+    void retry_update(rehash to_rehash, Updater updater);
+
+    // Sets base and all cells to the given value.
+    void internal_reset(int64_t initial_value);
+
+    // Base value, used mainly when there is no contention, but also as a fallback during
+    // table initialization races. Updated via CAS.
+    std::atomic<int64_t> _base{0};
+
+    // Memory manager of cells. Once the destructor is called, cells will be freed.
+    cacheline_aligned_int64_ptr _cells_holder;
+
+    // Table of cells. When non-null, size is the nearest power of 2 >= NCPU.
+    // If this is set to -1, the pointer is 'locked' and some thread is in the
+    // process of allocating the array.
+    std::atomic<cacheline_aligned_int64 *> _cells{nullptr};
+
+    static uint64_t get_tls_hashcode();
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(striped64);
+
+    // Static hash code per-thread. Shared across all instances to limit thread-local pollution.
+    // Also, if a thread hits a collision on one striped64, it's also likely to collide on
+    // other striped64s too.
+    static __thread uint64_t _tls_hashcode;
+};
+
+// A 64-bit number optimized for high-volume concurrent updates.
+// See striped64 for a longer explanation of the inner workings.
+class striped_long_adder : striped64
+{
+public:
+    striped_long_adder() = default;
+
+    ~striped_long_adder() = default;
+
+    void increment_by(int64_t x);
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    int64_t value() const;
+
+    // Call reset() ONLY when necessary.
+    inline void reset() { set(0); }
+
+    // Return the value immediately before it's reset.
+    int64_t fetch_and_reset();
+
+private:
+    // `set` is not exposed since it's not an efficient operation
+    void set(int64_t val) { internal_reset(val); }
+
+    DISALLOW_COPY_AND_ASSIGN(striped_long_adder);
+};
+
+class concurrent_long_adder
+{
+public:
+    concurrent_long_adder();
+    ~concurrent_long_adder() = default;
+
+    void increment_by(int64_t x);
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    int64_t value() const;
+
+    // Call reset() ONLY when necessary.
+    inline void reset() { set(0); }
+
+    // Return the value immediately before it's reset.
+    int64_t fetch_and_reset();
+
+private:
+    // `set` is not exposed since it's not an efficient operation
+    void set(int64_t val);
+
+    cacheline_aligned_int64_ptr _cells_holder;
+    cacheline_aligned_int64 *_cells;
+
+    DISALLOW_COPY_AND_ASSIGN(concurrent_long_adder);
+};
+
+// Use template to wrap a long_adder implementation rather than inherit from a base class for
+// the reason that virtual function will increase the class size and slow down the execution.
+template <typename Adder>
+class long_adder_wrapper
+{
+public:
+    long_adder_wrapper() = default;
+
+    ~long_adder_wrapper() = default;
+
+    inline void increment_by(int64_t x) { adder.increment_by(x); }
+    inline void increment() { increment_by(1); }
+    inline void decrement() { increment_by(-1); }
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    inline int64_t value() const { return adder.value(); }
+
+    // Resets the counter state to zero. Call it ONLY when necessary.
+    inline void reset() { adder.reset(); }
+
+    // Return the value immediately before it's reset.
+    inline int64_t fetch_and_reset() { return adder.fetch_and_reset(); }
+
+private:
+    Adder adder;
+
+    DISALLOW_COPY_AND_ASSIGN(long_adder_wrapper);
+};
+
+} // namespace dsn
diff --git a/include/dsn/utility/ports.h b/include/dsn/utility/ports.h
index 1a9727fad8..6e2b182755 100644
--- a/include/dsn/utility/ports.h
+++ b/include/dsn/utility/ports.h
@@ -78,3 +78,31 @@
 #include <machine/endian.h> // NOLINT(build/include)
 
 #endif
+
+// Cache line alignment
+#if defined(__i386__) || defined(__x86_64__)
+#define CACHELINE_SIZE 64
+#elif defined(__powerpc64__)
+// TODO(user) This is the L1 D-cache line size of our Power7 machines.
+// Need to check if this is appropriate for other PowerPC64 systems.
+#define CACHELINE_SIZE 128
+#elif defined(__aarch64__)
+#define CACHELINE_SIZE 64
+#elif defined(__arm__)
+// Cache line sizes for ARM: These values are not strictly correct since
+// cache line sizes depend on implementations, not architectures.  There
+// are even implementations with cache line sizes configurable at boot
+// time.
+#if defined(__ARM_ARCH_5T__)
+#define CACHELINE_SIZE 32
+#elif defined(__ARM_ARCH_7A__)
+#define CACHELINE_SIZE 64
+#endif
+#endif
+
+// This is a NOP if CACHELINE_SIZE is not defined.
+#ifdef CACHELINE_SIZE
+#define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+#else
+#define CACHELINE_ALIGNED
+#endif
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt
index b95379557c..f434520eb9 100644
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -18,4 +18,5 @@ else()
     dsn_add_shared_library()
 endif()
 
+add_subdirectory(long_adder_bench)
 add_subdirectory(test)
diff --git a/src/utils/long_adder.cpp b/src/utils/long_adder.cpp
new file mode 100644
index 0000000000..af080f3d5b
--- /dev/null
+++ b/src/utils/long_adder.cpp
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/long_adder.h>
+
+#ifdef __aarch64__
+#define _mm_free(p) free(p)
+#define _mm_malloc(a, b) malloc(a)
+#else
+#include <mm_malloc.h>
+#endif //__aarch64__
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <new>
+#include <string>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/rand.h>
+#include <dsn/utility/safe_strerror_posix.h>
+
+namespace dsn {
+
+namespace {
+
+const uint32_t kNumCpus = sysconf(_SC_NPROCESSORS_ONLN);
+uint32_t compute_num_cells()
+{
+    uint32_t n = 1;
+    // Calculate the size. Nearest power of two >= NCPU.
+    // Also handle a negative NCPU, can happen if sysconf name is unknown
+    while (kNumCpus > n) {
+        n <<= 1;
+    }
+    return n;
+}
+const uint32_t kNumCells = compute_num_cells();
+const uint32_t kCellMask = kNumCells - 1;
+
+cacheline_aligned_int64 *const kCellsLocked = reinterpret_cast<cacheline_aligned_int64 *>(-1L);
+
+} // anonymous namespace
+
+//
+// cacheline_aligned_int64
+//
+
+/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size)
+{
+    void *buffer = nullptr;
+    int err = posix_memalign(&buffer, CACHELINE_SIZE, sizeof(cacheline_aligned_int64) * size);
+
+    // Generally there are 2 possible errors for posix_memalign as below:
+    // [EINVAL]
+    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).
+    // [ENOMEM]
+    //     There is insufficient memory available with the requested alignment.
+    // Thus making an assertion here is enough.
+    dassert_f(err == 0, "error calling posix_memalign: {}", utils::safe_strerror(err).c_str());
+
+    cacheline_aligned_int64 *array = new (buffer) cacheline_aligned_int64[size];
+    for (uint32_t i = 0; i < size; ++i) {
+        cacheline_aligned_int64 *elem = &(array[i]);
+        dassert_f(
+            (reinterpret_cast<const uintptr_t>(elem) & (sizeof(cacheline_aligned_int64) - 1)) == 0,
+            "unaligned cacheline_aligned_int64: array={}, index={}, elem={}, mask={}",
+            fmt::ptr(array),
+            i,
+            fmt::ptr(elem),
+            sizeof(cacheline_aligned_int64) - 1);
+        array[i]._value.store(0);
+    }
+
+    return cacheline_aligned_int64_ptr(array, [](cacheline_aligned_int64 *array) { free(array); });
+}
+
+/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64()
+{
+    return new_cacheline_aligned_int64_array(1);
+}
+
+//
+// striped64
+//
+__thread uint64_t striped64::_tls_hashcode = 0;
+
+uint64_t striped64::get_tls_hashcode()
+{
+    if (dsn_unlikely(_tls_hashcode == 0)) {
+        const uint64_t tid = static_cast<uint64_t>(utils::get_current_tid());
+        // Avoid zero to allow xorShift rehash, and because 0 indicates an unset
+        // hashcode above.
+        const uint64_t hash = (tid == 0) ? rand::next_u64() : tid;
+        _tls_hashcode = (hash == 0) ? 1 : hash;
+    }
+    return _tls_hashcode;
+}
+
+template <class Updater>
+void striped64::retry_update(rehash to_rehash, Updater updater)
+{
+    uint64_t h = get_tls_hashcode();
+    // There are three operations in this loop.
+    //
+    // 1. Try to add to the cacheline_aligned_int64 hash table entry for the thread if the table
+    // exists. When there's contention, rehash to try a different cacheline_aligned_int64.
+    // 2. Try to initialize the hash table.
+    // 3. Try to update the base counter.
+    //
+    // These are predicated on successful CAS operations, which is why it's all wrapped in an
+    // infinite retry loop.
+    while (true) {
+        cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);
+        if (cells != nullptr && cells != kCellsLocked) {
+            if (to_rehash == kRehash) {
+                // CAS failed already, rehash before trying to increment.
+                to_rehash = kNoRehash;
+            } else {
+                cacheline_aligned_int64 *cell = &(_cells[h & kCellMask]);
+                int64_t v = cell->_value.load(std::memory_order_relaxed);
+                if (cell->compare_and_set(v, updater(v))) {
+                    // Successfully CAS'd the corresponding cell, done.
+                    break;
+                }
+            }
+            // Rehash since we failed to CAS, either previously or just now.
+            h ^= h << 13;
+            h ^= h >> 17;
+            h ^= h << 5;
+        } else if (cells == nullptr && _cells.compare_exchange_weak(cells, kCellsLocked)) {
+            _cells_holder = new_cacheline_aligned_int64_array(kNumCells);
+            cells = _cells_holder.get();
+            _cells.store(cells, std::memory_order_release);
+        } else {
+            // Fallback to adding to the base value.
+            // Means the table wasn't initialized or we failed to init it.
+            int64_t v = _base.load(std::memory_order_relaxed);
+            if (cas_base(v, updater(v))) {
+                break;
+            }
+        }
+    }
+
+    // Record index for next time
+    _tls_hashcode = h;
+}
+
+void striped64::internal_reset(int64_t initial_value)
+{
+    _base.store(initial_value);
+
+    cacheline_aligned_int64 *c;
+    do {
+        c = _cells.load(std::memory_order_acquire);
+    } while (c == kCellsLocked);
+
+    if (c != nullptr) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            c[i]._value.store(0);
+        }
+    }
+}
+
+void striped_long_adder::increment_by(int64_t x)
+{
+    // Use hash table if present. If that fails, call retry_update to rehash and retry.
+    // If no hash table, try to CAS the base counter. If that fails, retry_update to init the table.
+    cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);
+    if (cells != nullptr && cells != kCellsLocked) {
+        cacheline_aligned_int64 *cell = &(cells[get_tls_hashcode() & kCellMask]);
+        dassert_f(
+            (reinterpret_cast<const uintptr_t>(cell) & (sizeof(cacheline_aligned_int64) - 1)) == 0,
+            "unaligned cacheline_aligned_int64 not allowed for striped64: cell={}, mask={}",
+            fmt::ptr(cell),
+            sizeof(cacheline_aligned_int64) - 1);
+
+        const int64_t old = cell->_value.load(std::memory_order_relaxed);
+        if (!cell->compare_and_set(old, old + x)) {
+            // When we hit a hash table contention, signal retry_update to rehash.
+            retry_update(kRehash, [x](int64_t old) { return old + x; });
+        }
+    } else {
+        int64_t b = _base.load(std::memory_order_relaxed);
+        if (!cas_base(b, b + x)) {
+            // Attempt to initialize the table. No need to rehash since the contention was for the
+            // base counter, not the hash table.
+            retry_update(kNoRehash, [x](int64_t old) { return old + x; });
+        }
+    }
+}
+
+//
+// striped_long_adder
+//
+
+int64_t striped_long_adder::value() const
+{
+    int64_t sum = _base.load(std::memory_order_relaxed);
+
+    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);
+    if (c != nullptr && c != kCellsLocked) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            sum += c[i]._value.load(std::memory_order_relaxed);
+        }
+    }
+    return sum;
+}
+
+int64_t striped_long_adder::fetch_and_reset()
+{
+    int64_t sum = _base.exchange(0, std::memory_order_relaxed);
+
+    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);
+    if (c != nullptr && c != kCellsLocked) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            sum += c[i]._value.exchange(0, std::memory_order_relaxed);
+        }
+    }
+    return sum;
+}
+
+//
+// concurrent_long_adder
+//
+
+concurrent_long_adder::concurrent_long_adder()
+    : _cells_holder(new_cacheline_aligned_int64_array(kNumCells)), _cells(_cells_holder.get())
+{
+}
+
+void concurrent_long_adder::increment_by(int64_t x)
+{
+    auto task_id = static_cast<uint32_t>(utils::get_current_tid());
+    _cells[task_id & kCellMask]._value.fetch_add(x, std::memory_order_relaxed);
+}
+
+int64_t concurrent_long_adder::value() const
+{
+    int64_t sum = 0;
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        sum += _cells[i]._value.load(std::memory_order_relaxed);
+    }
+    return sum;
+}
+
+void concurrent_long_adder::set(int64_t val)
+{
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        _cells[i]._value.store(0, std::memory_order_relaxed);
+    }
+    _cells[0]._value.store(val, std::memory_order_relaxed);
+}
+
+int64_t concurrent_long_adder::fetch_and_reset()
+{
+    int64_t sum = 0;
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        sum += _cells[i]._value.exchange(0, std::memory_order_relaxed);
+    }
+    return sum;
+}
+
+} // namespace dsn
diff --git a/src/utils/long_adder_bench/CMakeLists.txt b/src/utils/long_adder_bench/CMakeLists.txt
new file mode 100644
index 0000000000..f63efc8a96
--- /dev/null
+++ b/src/utils/long_adder_bench/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(MY_PROJ_NAME long_adder_bench)
+project(${MY_PROJ_NAME} C CXX)
+
+# Source files under CURRENT project directory will be automatically included.
+# You can manually set MY_PROJ_SRC to include source files under other directories.
+set(MY_PROJ_SRC "")
+
+# Search mode for source files under CURRENT project directory?
+# "GLOB_RECURSE" for recursive search
+# "GLOB" for non-recursive search
+set(MY_SRC_SEARCH_MODE "GLOB")
+
+set(MY_PROJ_LIBS dsn_runtime dsn_utils)
+
+set(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)
+
+# Extra files that will be installed
+set(MY_BINPLACES "")
+
+dsn_add_executable()
+
+dsn_install_executable()
diff --git a/src/utils/long_adder_bench/long_adder_bench.cpp b/src/utils/long_adder_bench/long_adder_bench.cpp
new file mode 100644
index 0000000000..6fbb249b8f
--- /dev/null
+++ b/src/utils/long_adder_bench/long_adder_bench.cpp
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <cstdlib>
+#include <cstring>
+#include <thread>
+#include <vector>
+
+#include <fmt/ostream.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/long_adder.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/string_conv.h>
+
+// The simplest implementation of long adder: just wrap std::atomic<int64_t>.
+class simple_long_adder
+{
+public:
+    simple_long_adder() = default;
+
+    ~simple_long_adder() = default;
+
+    inline void increment_by(int64_t x) { _value.fetch_add(x, std::memory_order_relaxed); }
+
+    inline int64_t value() const { return _value.load(std::memory_order_relaxed); }
+
+    inline void reset() { set(0); }
+
+    inline int64_t fetch_and_reset() { return _value.exchange(0, std::memory_order_relaxed); }
+
+private:
+    inline void set(int64_t val) { _value.store(val, std::memory_order_relaxed); }
+
+    std::atomic<int64_t> _value{0};
+
+    DISALLOW_COPY_AND_ASSIGN(simple_long_adder);
+};
+
+// A modification of perf_counter_number_atomic from perf_counter.
+// This modification has removed virtual functions from original version, where main interfaces
+// has been implemented as virtual functions, however, which will slow down the execution.
+#define DIVIDE_CONTAINER 107
+class divided_long_adder
+{
+public:
+    divided_long_adder()
+    {
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            _value[i].store(0);
+        }
+    }
+
+    ~divided_long_adder() = default;
+
+    inline void increment_by(int64_t x)
+    {
+        auto task_id = static_cast<uint32_t>(dsn::utils::get_current_tid());
+        _value[task_id % DIVIDE_CONTAINER].fetch_add(x, std::memory_order_relaxed);
+    }
+
+    int64_t value() const
+    {
+        int64_t sum = 0;
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            sum += _value[i].load(std::memory_order_relaxed);
+        }
+        return sum;
+    }
+
+    inline void reset() { set(0); }
+
+    int64_t fetch_and_reset()
+    {
+        int64_t sum = 0;
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            sum += _value[i].exchange(0, std::memory_order_relaxed);
+        }
+        return sum;
+    }
+
+private:
+    void set(int64_t val)
+    {
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            _value[i].store(0, std::memory_order_relaxed);
+        }
+        _value[0].store(val, std::memory_order_relaxed);
+    }
+
+    std::atomic<int64_t> _value[DIVIDE_CONTAINER];
+
+    DISALLOW_COPY_AND_ASSIGN(divided_long_adder);
+};
+
+void print_usage(const char *cmd)
+{
+    fmt::print(stderr, "USAGE: {} <num_operations> <num_threads> <long_adder_type>\n", cmd);
+    fmt::print(stderr, "Run a simple benchmark that executes each sort of long adder.\n\n");
+
+    fmt::print(
+        stderr,
+        "    <num_operations>       the number of increment operations executed by each thread\n");
+    fmt::print(stderr, "    <num_threads>          the number of threads\n");
+    fmt::print(stderr,
+               "    <long_adder_type>      the type of long adder: simple_long_adder, "
+               "divided_long_adder, striped_long_adder, concurrent_long_adder\n");
+}
+
+template <typename Adder>
+void run_bench(int64_t num_operations, int64_t num_threads, const char *name)
+{
+    dsn::long_adder_wrapper<Adder> adder;
+
+    std::vector<std::thread> threads;
+
+    uint64_t start = dsn_now_ns();
+    for (int64_t i = 0; i < num_threads; i++) {
+        threads.emplace_back([num_operations, &adder]() {
+            for (int64_t i = 0; i < num_operations; ++i) {
+                adder.increment();
+            }
+        });
+    }
+    for (auto &t : threads) {
+        t.join();
+    }
+    uint64_t end = dsn_now_ns();
+
+    auto duration_ns = static_cast<int64_t>(end - start);
+    std::chrono::nanoseconds nano(duration_ns);
+    auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();
+
+    fmt::print(stdout,
+               "Running {} operations of {} with {} threads took {} seconds, result = {}.\n",
+               num_operations,
+               name,
+               num_threads,
+               duration_s,
+               adder.value());
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 4) {
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    int64_t num_operations;
+    if (!dsn::buf2int64(argv[1], num_operations)) {
+        fmt::print(stderr, "Invalid num_operations: {}\n\n", argv[1]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    int64_t num_threads;
+    if (!dsn::buf2int64(argv[2], num_threads)) {
+        fmt::print(stderr, "Invalid num_threads: {}\n\n", argv[2]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    const char *long_adder_type = argv[3];
+    if (strcmp(long_adder_type, "simple_long_adder") == 0) {
+        run_bench<simple_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "divided_long_adder") == 0) {
+        run_bench<divided_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "striped_long_adder") == 0) {
+        run_bench<dsn::striped_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "concurrent_long_adder") == 0) {
+        run_bench<dsn::concurrent_long_adder>(num_operations, num_threads, long_adder_type);
+    } else {
+        fmt::print(stderr, "Invalid long_adder_type: {}\n\n", long_adder_type);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    return 0;
+}
diff --git a/src/utils/test/long_adder_test.cpp b/src/utils/test/long_adder_test.cpp
new file mode 100644
index 0000000000..71d8e4ffb9
--- /dev/null
+++ b/src/utils/test/long_adder_test.cpp
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <functional>
+#include <thread>
+#include <vector>
+
+#include <fmt/ostream.h>
+#include <gtest/gtest.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/long_adder.h>
+
+namespace dsn {
+
+template <typename T>
+struct type_parse_traits;
+
+#define REGISTER_PARSE_TYPE(X)                                                                     \
+    template <>                                                                                    \
+    struct type_parse_traits<X>                                                                    \
+    {                                                                                              \
+        static const char *name;                                                                   \
+    };                                                                                             \
+    const char *type_parse_traits<X>::name = #X
+
+REGISTER_PARSE_TYPE(striped_long_adder);
+REGISTER_PARSE_TYPE(concurrent_long_adder);
+
+template <typename Adder>
+class long_adder_test
+{
+public:
+    long_adder_test() = default;
+
+    void run_increment_by(int64_t base_value,
+                          int64_t delta,
+                          int64_t num_operations,
+                          int64_t num_threads,
+                          int64_t &result)
+    {
+        execute(num_threads,
+                [this, delta, num_operations]() { this->increment_by(delta, num_operations); });
+        result = base_value + delta * num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void
+    run_increment(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)
+    {
+        execute(num_threads, [this, num_operations]() { this->increment(num_operations); });
+        result = base_value + num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void
+    run_decrement(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)
+    {
+        execute(num_threads, [this, num_operations]() { this->decrement(num_operations); });
+        result = base_value - num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void run_basic_cases(int64_t num_threads)
+    {
+        fmt::print(stdout,
+                   "Ready to run basic cases for {} with {} threads.\n",
+                   type_parse_traits<Adder>::name,
+                   num_threads);
+
+        // Initially should be zero
+        int64_t base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Do basic test with custom number of threads
+        auto do_increment_by = std::bind(&long_adder_test::run_increment_by,
+                                         this,
+                                         std::placeholders::_1,
+                                         std::placeholders::_2,
+                                         std::placeholders::_3,
+                                         num_threads,
+                                         std::placeholders::_4);
+        auto do_increment = std::bind(&long_adder_test::run_increment,
+                                      this,
+                                      std::placeholders::_1,
+                                      std::placeholders::_2,
+                                      num_threads,
+                                      std::placeholders::_3);
+        auto do_decrement = std::bind(&long_adder_test::run_decrement,
+                                      this,
+                                      std::placeholders::_1,
+                                      std::placeholders::_2,
+                                      num_threads,
+                                      std::placeholders::_3);
+
+        // Test increment_by
+        do_increment_by(base_value, 1, 1, base_value);
+        do_increment_by(base_value, 100, 1, base_value);
+        do_increment_by(base_value, 10, 10, base_value);
+        do_increment_by(base_value, -10, 10, base_value);
+        do_increment_by(base_value, -100, 1, base_value);
+        do_increment_by(base_value, -1, 1, base_value);
+        ASSERT_EQ(0, _adder.value());
+        ASSERT_EQ(0, base_value);
+
+        // Test increment
+        do_increment(base_value, 1, base_value);
+        do_increment(base_value, 100, base_value);
+
+        // Fetch and reset
+        ASSERT_EQ(base_value, _adder.fetch_and_reset());
+        base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Test decrement
+        do_decrement(base_value, 100, base_value);
+        do_decrement(base_value, 1, base_value);
+
+        // Reset at last
+        _adder.reset();
+        base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+    }
+
+    void run_concurrent_cases(int64_t num_operations, int64_t num_threads)
+    {
+        fmt::print(
+            stdout, "Ready to run concurrent cases for {}:\n", type_parse_traits<Adder>::name);
+
+        // Initially adder should be zero
+        int64_t base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Define runner to time each case
+        auto runner = [num_operations, num_threads](
+            const char *name, std::function<void(int64_t &)> func, int64_t &result) {
+            uint64_t start = dsn_now_ns();
+            func(result);
+            uint64_t end = dsn_now_ns();
+
+            auto duration_ns = static_cast<int64_t>(end - start);
+            std::chrono::nanoseconds nano(duration_ns);
+            auto duration_ms =
+                std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(nano).count();
+
+            fmt::print(stdout,
+                       "Running {} operations of {} with {} threads took {} ms.\n",
+                       num_operations,
+                       name,
+                       num_threads,
+                       duration_ms);
+        };
+
+        // Test increment
+        auto do_increment = std::bind(&long_adder_test::run_increment,
+                                      this,
+                                      base_value,
+                                      num_operations,
+                                      num_threads,
+                                      std::placeholders::_1);
+        runner("Increment", do_increment, base_value);
+
+        // Test decrement
+        auto do_decrement = std::bind(&long_adder_test::run_decrement,
+                                      this,
+                                      base_value,
+                                      num_operations,
+                                      num_threads,
+                                      std::placeholders::_1);
+        runner("Decrement", do_decrement, base_value);
+
+        // At last adder should also be zero
+        ASSERT_EQ(0, _adder.value());
+        ASSERT_EQ(0, base_value);
+    }
+
+private:
+    void increment_by(int64_t delta, int64_t n)
+    {
+        for (int64_t i = 0; i < n; ++i) {
+            _adder.increment_by(delta);
+        }
+    }
+
+    void increment(int64_t num)
+    {
+        for (int64_t i = 0; i < num; ++i) {
+            _adder.increment();
+        }
+    }
+
+    void decrement(int64_t num)
+    {
+        for (int64_t i = 0; i < num; ++i) {
+            _adder.decrement();
+        }
+    }
+
+    void execute(int64_t num_threads, std::function<void()> runner)
+    {
+        std::vector<std::thread> threads;
+        for (int64_t i = 0; i < num_threads; i++) {
+            threads.emplace_back(runner);
+        }
+        for (auto &t : threads) {
+            t.join();
+        }
+    }
+
+    long_adder_wrapper<Adder> _adder;
+};
+
+template <typename Adder>
+void run_basic_cases()
+{
+    long_adder_test<Adder> test;
+    test.run_basic_cases(1);
+    test.run_basic_cases(4);
+}
+
+template <typename Adder0, typename Adder1, typename... Others>
+void run_basic_cases()
+{
+    run_basic_cases<Adder0>();
+    run_basic_cases<Adder1, Others...>();
+}
+
+template <typename Adder>
+void run_concurrent_cases()
+{
+    long_adder_test<Adder> test;
+    test.run_concurrent_cases(10000000, 1);
+    test.run_concurrent_cases(10000000, 4);
+}
+
+template <typename Adder0, typename Adder1, typename... Others>
+void run_concurrent_cases()
+{
+    run_concurrent_cases<Adder0>();
+    run_concurrent_cases<Adder1, Others...>();
+}
+
+TEST(long_adder_test, basic_cases) { run_basic_cases<striped_long_adder, concurrent_long_adder>(); }
+
+TEST(long_adder_test, concurrent_cases)
+{
+    run_concurrent_cases<striped_long_adder, concurrent_long_adder>();
+}
+
+} // namespace dsn

From 7fc37c87852c418c03c90b0e964d26a58486549b Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Thu, 10 Mar 2022 14:15:18 +0800
Subject: [PATCH 02/21] feat(new_metrics): implement the metric entity & its
 prototype (#1070)

---
 include/dsn/utility/metrics.h   | 92 +++++++++++++++++++++++++++++++++
 src/utils/metrics.cpp           | 55 ++++++++++++++++++++
 src/utils/test/metrics_test.cpp | 80 ++++++++++++++++++++++++++++
 3 files changed, 227 insertions(+)
 create mode 100644 include/dsn/utility/metrics.h
 create mode 100644 src/utils/metrics.cpp
 create mode 100644 src/utils/test/metrics_test.cpp

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
new file mode 100644
index 0000000000..5908425742
--- /dev/null
+++ b/include/dsn/utility/metrics.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+#include <dsn/utility/autoref_ptr.h>
+#include <dsn/utility/ports.h>
+
+// A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
+// inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
+//
+//
+// Example of defining and instantiating a metric entity
+// -----------------------------------------------------
+// Define an entity type at the top of your .cpp file (not within any namespace):
+// METRIC_DEFINE_entity(my_entity);
+//
+// To use the entity type, declare it at the top of any .h/.cpp file (not within any namespace):
+// METRIC_DECLARE_entity(my_entity);
+//
+// Instantiating the entity in whatever class represents the entity:
+// entity_instance = METRIC_ENTITY_my_entity.instantiate(my_entity_id, ...);
+
+// Define a new entity type.
+#define METRIC_DEFINE_entity(name) ::dsn::metric_entity_prototype METRIC_ENTITY_##name(#name)
+
+// The following macros act as forward declarations for entity types and metric prototypes.
+#define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
+
+namespace dsn {
+
+class metric_entity : public ref_counter
+{
+public:
+    using attr_map = std::unordered_map<std::string, std::string>;
+
+    const std::string &id() const { return _id; }
+
+    const attr_map &attributes() const { return _attrs; }
+
+private:
+    friend class metric_entity_prototype;
+    friend class ref_ptr<metric_entity>;
+
+    metric_entity(const std::string &id, attr_map &&attrs);
+
+    ~metric_entity();
+
+    const std::string _id;
+    const attr_map _attrs;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_entity);
+};
+
+using metric_entity_ptr = ref_ptr<metric_entity>;
+
+class metric_entity_prototype
+{
+public:
+    explicit metric_entity_prototype(const char *name);
+    ~metric_entity_prototype();
+
+    const char *name() const { return _name; }
+
+    // Create an entity with the given ID and attributes, if any.
+    metric_entity_ptr instantiate(const std::string &id, metric_entity::attr_map attrs) const;
+    metric_entity_ptr instantiate(const std::string &id) const;
+
+private:
+    const char *const _name;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_entity_prototype);
+};
+
+} // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
new file mode 100644
index 0000000000..0704ef7d7b
--- /dev/null
+++ b/src/utils/metrics.cpp
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/metrics.h>
+
+#include <utility>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+
+namespace dsn {
+
+metric_entity::metric_entity(const std::string &id, attr_map &&attrs)
+    : _id(id), _attrs(std::move(attrs))
+{
+}
+
+metric_entity::~metric_entity() {}
+
+ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id,
+                                                            metric_entity::attr_map attrs) const
+{
+    dassert_f(attrs.find("entity") == attrs.end(), "{}'s attribute \"entity\" is reserved", id);
+
+    attrs["entity"] = _name;
+    ref_ptr<metric_entity> entity(new metric_entity(id, std::move(attrs)));
+
+    return entity;
+}
+
+ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id) const
+{
+
+    return instantiate(id, {});
+}
+
+metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name) {}
+
+metric_entity_prototype::~metric_entity_prototype() {}
+
+} // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
new file mode 100644
index 0000000000..cf15356594
--- /dev/null
+++ b/src/utils/test/metrics_test.cpp
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/metrics.h>
+
+#include <gtest/gtest.h>
+
+METRIC_DEFINE_entity(my_server);
+METRIC_DEFINE_entity(my_table);
+METRIC_DEFINE_entity(my_replica);
+
+namespace dsn {
+
+TEST(metrics_test, create_entity)
+{
+    // Test cases:
+    // - create an entity by instantiate(id) without any attribute
+    // - create another entity by instantiate(id, attrs) without any attribute
+    // - create an entity with an attribute
+    // - create another entity with an attribute
+    // - create an entity with 2 attributes
+    // - create another entity with 2 attributes
+    struct test_case
+    {
+        metric_entity_prototype *prototype;
+        std::string type_name;
+        std::string entity_id;
+        metric_entity::attr_map entity_attrs;
+        bool use_attrs_arg_if_empty;
+    } tests[] = {{&METRIC_ENTITY_my_server, "my_server", "server_1", {}, false},
+                 {&METRIC_ENTITY_my_server, "my_server", "server_2", {}, true},
+                 {&METRIC_ENTITY_my_table, "my_table", "test_1", {{"table", "test_1"}}, true},
+                 {&METRIC_ENTITY_my_table, "my_table", "test_2", {{"table", "test_2"}}, true},
+                 {&METRIC_ENTITY_my_replica,
+                  "my_replica",
+                  "1.2",
+                  {{"table", "test_1"}, {"partition", "2"}},
+                  true},
+                 {&METRIC_ENTITY_my_replica,
+                  "my_replica",
+                  "2.5",
+                  {{"table", "test_2"}, {"partition", "5"}},
+                  true}};
+    for (const auto &test : tests) {
+        ASSERT_EQ(test.prototype->name(), test.type_name);
+
+        metric_entity_ptr entity;
+        if (test.entity_attrs.empty() && !test.use_attrs_arg_if_empty) {
+            entity = test.prototype->instantiate(test.entity_id);
+        } else {
+            entity = test.prototype->instantiate(test.entity_id, test.entity_attrs);
+        }
+
+        auto id = entity->id();
+        ASSERT_EQ(id, test.entity_id);
+
+        auto attrs = entity->attributes();
+        ASSERT_NE(attrs.find("entity"), attrs.end());
+        ASSERT_EQ(attrs["entity"], test.type_name);
+        ASSERT_EQ(attrs.size(), test.entity_attrs.size() + 1);
+        ASSERT_EQ(attrs.erase("entity"), 1);
+        ASSERT_EQ(attrs, test.entity_attrs);
+    }
+}
+
+} // namespace dsn

From d592fb35b71ae4b5bf930c6584a8a4b30ce5cdbc Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Sat, 12 Mar 2022 12:23:34 +0800
Subject: [PATCH 03/21] feat(new_metrics): implement the metric registry
 (#1073)

---
 include/dsn/utility/metrics.h   | 34 +++++++++++++++++++--
 src/utils/metrics.cpp           | 53 ++++++++++++++++++++++++++++-----
 src/utils/test/metrics_test.cpp | 35 ++++++++++++++++++++++
 3 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 5908425742..03df62b5a9 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,11 +17,13 @@
 
 #pragma once
 
+#include <mutex>
 #include <string>
 #include <unordered_map>
 
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/ports.h>
+#include <dsn/utility/singleton.h>
 
 // A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
 // inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
@@ -53,18 +55,22 @@ class metric_entity : public ref_counter
 
     const std::string &id() const { return _id; }
 
-    const attr_map &attributes() const { return _attrs; }
+    attr_map attributes() const;
 
 private:
-    friend class metric_entity_prototype;
+    friend class metric_registry;
     friend class ref_ptr<metric_entity>;
 
     metric_entity(const std::string &id, attr_map &&attrs);
 
     ~metric_entity();
 
+    void set_attributes(attr_map &&attrs);
+
     const std::string _id;
-    const attr_map _attrs;
+
+    mutable std::mutex _mtx;
+    attr_map _attrs;
 
     DISALLOW_COPY_AND_ASSIGN(metric_entity);
 };
@@ -89,4 +95,26 @@ class metric_entity_prototype
     DISALLOW_COPY_AND_ASSIGN(metric_entity_prototype);
 };
 
+class metric_registry : public utils::singleton<metric_registry>
+{
+public:
+    using entity_map = std::unordered_map<std::string, metric_entity_ptr>;
+
+    entity_map entities() const;
+
+private:
+    friend class metric_entity_prototype;
+    friend class utils::singleton<metric_registry>;
+
+    metric_registry();
+    ~metric_registry();
+
+    metric_entity_ptr find_or_create_entity(const std::string &id, metric_entity::attr_map &&attrs);
+
+    mutable std::mutex _mtx;
+    entity_map _entities;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_registry);
+};
+
 } // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 0704ef7d7b..90c4cd759e 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -31,20 +31,29 @@ metric_entity::metric_entity(const std::string &id, attr_map &&attrs)
 
 metric_entity::~metric_entity() {}
 
-ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id,
-                                                            metric_entity::attr_map attrs) const
+metric_entity::attr_map metric_entity::attributes() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    return _attrs;
+}
+
+void metric_entity::set_attributes(attr_map &&attrs)
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    _attrs = std::move(attrs);
+}
+
+metric_entity_ptr metric_entity_prototype::instantiate(const std::string &id,
+                                                       metric_entity::attr_map attrs) const
 {
     dassert_f(attrs.find("entity") == attrs.end(), "{}'s attribute \"entity\" is reserved", id);
 
     attrs["entity"] = _name;
-    ref_ptr<metric_entity> entity(new metric_entity(id, std::move(attrs)));
-
-    return entity;
+    return metric_registry::instance().find_or_create_entity(id, std::move(attrs));
 }
 
-ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id) const
+metric_entity_ptr metric_entity_prototype::instantiate(const std::string &id) const
 {
-
     return instantiate(id, {});
 }
 
@@ -52,4 +61,34 @@ metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name)
 
 metric_entity_prototype::~metric_entity_prototype() {}
 
+metric_registry::metric_registry() {}
+
+metric_registry::~metric_registry() {}
+
+metric_registry::entity_map metric_registry::entities() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+
+    return _entities;
+}
+
+metric_entity_ptr metric_registry::find_or_create_entity(const std::string &id,
+                                                         metric_entity::attr_map &&attrs)
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+
+    entity_map::const_iterator iter = _entities.find(id);
+
+    metric_entity_ptr entity;
+    if (iter == _entities.end()) {
+        entity = new metric_entity(id, std::move(attrs));
+        _entities[id] = entity;
+    } else {
+        iter->second->set_attributes(std::move(attrs));
+        entity = iter->second;
+    }
+
+    return entity;
+}
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index cf15356594..e6f820a5b7 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -55,6 +55,8 @@ TEST(metrics_test, create_entity)
                   "2.5",
                   {{"table", "test_2"}, {"partition", "5"}},
                   true}};
+
+    metric_registry::entity_map entities;
     for (const auto &test : tests) {
         ASSERT_EQ(test.prototype->name(), test.type_name);
 
@@ -74,6 +76,39 @@ TEST(metrics_test, create_entity)
         ASSERT_EQ(attrs.size(), test.entity_attrs.size() + 1);
         ASSERT_EQ(attrs.erase("entity"), 1);
         ASSERT_EQ(attrs, test.entity_attrs);
+
+        ASSERT_EQ(entities.find(test.entity_id), entities.end());
+        entities[test.entity_id] = entity;
+    }
+
+    ASSERT_EQ(metric_registry::instance().entities(), entities);
+}
+
+TEST(metrics_test, recreate_entity)
+{
+    // Test cases:
+    // - add an attribute to an emtpy map
+    // - add another attribute to a single-element map
+    // - remove an attribute from the map
+    // - remove the only attribute from the map
+    struct test_case
+    {
+        metric_entity::attr_map entity_attrs;
+    } tests[] = {
+        {{{"name", "test"}}}, {{{"name", "test"}, {"id", "2"}}}, {{{"name", "test"}}}, {{{}}}};
+
+    const std::string entity_id("test");
+    auto expected_entity = METRIC_ENTITY_my_table.instantiate(entity_id);
+
+    for (const auto &test : tests) {
+        // the pointer of entity should be kept unchanged
+        auto entity = METRIC_ENTITY_my_table.instantiate(entity_id, test.entity_attrs);
+        ASSERT_EQ(entity, expected_entity);
+
+        // the attributes will updated
+        auto attrs = entity->attributes();
+        ASSERT_EQ(attrs.erase("entity"), 1);
+        ASSERT_EQ(attrs, test.entity_attrs);
     }
 }
 

From 0d632e3c18e5c499ca46d6ec7096a0fdbd6ed02f Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 18 Mar 2022 22:27:05 +0800
Subject: [PATCH 04/21] feat(new_metrics): implement the metric & its prototype
 (#1075)

---
 include/dsn/utility/casts.h     |  47 +++++++++++++
 include/dsn/utility/metrics.h   | 115 ++++++++++++++++++++++++++++++++
 src/utils/metrics.cpp           |  14 +++-
 src/utils/test/metrics_test.cpp | 112 ++++++++++++++++++++++++++++++-
 4 files changed, 285 insertions(+), 3 deletions(-)
 create mode 100644 include/dsn/utility/casts.h

diff --git a/include/dsn/utility/casts.h b/include/dsn/utility/casts.h
new file mode 100644
index 0000000000..8b3466843a
--- /dev/null
+++ b/include/dsn/utility/casts.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+namespace dsn {
+
+// Downcasting is to convert a base-class pointer(reference) to a derived-class
+// pointer(reference). As a usual approach, RTTI (dynamic_cast<>) is not efficient.
+// Instead, we can perform a compile-time assertion check whether one is derived
+// from another; then, just use static_cast<> to do the conversion faster. RTTI is
+// also run in debug mode to do double-check.
+
+template <typename To, typename From>
+inline To down_cast(From *from)
+{
+    // Perform a compile-time assertion to check whether <From> class is derived from <To> class.
+    static_assert(std::is_base_of<typename std::remove_pointer<From>::type,
+                                  typename std::remove_pointer<To>::type>::value,
+                  "<From> class is not derived from <To> class");
+
+    // Use RTTI to do double-check, though in practice the unit tests are seldom built in debug
+    // mode. For example, the unit tests of github CI for both rDSN and Pegasus are built in
+    // release mode.
+    assert(from == NULL || dynamic_cast<To>(from) != NULL);
+
+    return static_cast<To>(from);
+}
+
+} // namespace dsn
diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 03df62b5a9..e4448ba018 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -20,10 +20,14 @@
 #include <mutex>
 #include <string>
 #include <unordered_map>
+#include <utility>
 
 #include <dsn/utility/autoref_ptr.h>
+#include <dsn/utility/casts.h>
+#include <dsn/utility/enum_helper.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
+#include <dsn/utility/string_view.h>
 
 // A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
 // inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
@@ -48,15 +52,39 @@
 
 namespace dsn {
 
+class metric_prototype;
+class metric;
+using metric_ptr = ref_ptr<metric>;
+
 class metric_entity : public ref_counter
 {
 public:
     using attr_map = std::unordered_map<std::string, std::string>;
+    using metric_map = std::unordered_map<const metric_prototype *, metric_ptr>;
 
     const std::string &id() const { return _id; }
 
     attr_map attributes() const;
 
+    metric_map metrics() const;
+
+    // args are the parameters that are used to construct the object of MetricType
+    template <typename MetricType, typename... Args>
+    ref_ptr<MetricType> find_or_create(const metric_prototype *prototype, Args &&... args)
+    {
+        std::lock_guard<std::mutex> guard(_mtx);
+
+        metric_map::const_iterator iter = _metrics.find(prototype);
+        if (iter != _metrics.end()) {
+            auto raw_ptr = down_cast<MetricType *>(iter->second.get());
+            return raw_ptr;
+        }
+
+        ref_ptr<MetricType> ptr(new MetricType(prototype, std::forward<Args>(args)...));
+        _metrics[prototype] = ptr;
+        return ptr;
+    }
+
 private:
     friend class metric_registry;
     friend class ref_ptr<metric_entity>;
@@ -71,6 +99,7 @@ class metric_entity : public ref_counter
 
     mutable std::mutex _mtx;
     attr_map _attrs;
+    metric_map _metrics;
 
     DISALLOW_COPY_AND_ASSIGN(metric_entity);
 };
@@ -117,4 +146,90 @@ class metric_registry : public utils::singleton<metric_registry>
     DISALLOW_COPY_AND_ASSIGN(metric_registry);
 };
 
+enum class metric_unit
+{
+    kNanoSeconds,
+    kMicroSeconds,
+    kMilliSeconds,
+    kSeconds,
+    kInvalidUnit,
+};
+
+ENUM_BEGIN(metric_unit, metric_unit::kInvalidUnit)
+ENUM_REG(metric_unit::kNanoSeconds)
+ENUM_REG(metric_unit::kMicroSeconds)
+ENUM_REG(metric_unit::kMilliSeconds)
+ENUM_REG(metric_unit::kSeconds)
+ENUM_END(metric_unit)
+
+class metric_prototype
+{
+public:
+    struct ctor_args
+    {
+        const string_view entity_type;
+        const string_view name;
+        const metric_unit unit;
+        const string_view desc;
+    };
+
+    string_view entity_type() const { return _args.entity_type; }
+
+    string_view name() const { return _args.name; }
+
+    metric_unit unit() const { return _args.unit; }
+
+    string_view description() const { return _args.desc; }
+
+protected:
+    explicit metric_prototype(const ctor_args &args);
+    virtual ~metric_prototype();
+
+private:
+    const ctor_args _args;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_prototype);
+};
+
+// metric_prototype_with<MetricType> can help to implement the prototype of each type of metric
+// to construct a metric object conveniently.
+template <typename MetricType>
+class metric_prototype_with : public metric_prototype
+{
+public:
+    explicit metric_prototype_with(const ctor_args &args) : metric_prototype(args) {}
+    virtual ~metric_prototype_with() = default;
+
+    // Construct a metric object based on the instance of metric_entity.
+    template <typename... Args>
+    ref_ptr<MetricType> instantiate(const metric_entity_ptr &entity, Args &&... args) const
+    {
+        return entity->find_or_create<MetricType>(this, std::forward<Args>(args)...);
+    }
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(metric_prototype_with);
+};
+
+// Base class for each type of metric.
+// Every metric class should inherit from this class.
+//
+// User object should hold a ref_ptr of a metric, while the entity will hold another ref_ptr.
+// The ref count of a metric may becomes 1, which means the metric is only held by the entity:
+// After a period of configurable time, if the ref count is still 1, the metric will be dropped
+// in that it's considered to be useless. During the period when the metric is retained, once
+// the same one is instantiated again, it will not be removed; whether the metric is instantiated,
+// however, its lastest value is visible.
+class metric : public ref_counter
+{
+protected:
+    explicit metric(const metric_prototype *prototype);
+    virtual ~metric() = default;
+
+    const metric_prototype *const _prototype;
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(metric);
+};
+
 } // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 90c4cd759e..8f96792b81 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -17,8 +17,6 @@
 
 #include <dsn/utility/metrics.h>
 
-#include <utility>
-
 #include <dsn/c/api_utilities.h>
 #include <dsn/dist/fmt_logging.h>
 
@@ -37,6 +35,12 @@ metric_entity::attr_map metric_entity::attributes() const
     return _attrs;
 }
 
+metric_entity::metric_map metric_entity::metrics() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    return _metrics;
+}
+
 void metric_entity::set_attributes(attr_map &&attrs)
 {
     std::lock_guard<std::mutex> guard(_mtx);
@@ -91,4 +95,10 @@ metric_entity_ptr metric_registry::find_or_create_entity(const std::string &id,
     return entity;
 }
 
+metric_prototype::metric_prototype(const ctor_args &args) : _args(args) {}
+
+metric_prototype::~metric_prototype() {}
+
+metric::metric(const metric_prototype *prototype) : _prototype(prototype) {}
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index e6f820a5b7..5bc979bc43 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -19,10 +19,55 @@
 
 #include <gtest/gtest.h>
 
+namespace dsn {
+
+class my_gauge : public metric
+{
+public:
+    explicit my_gauge(const metric_prototype *prototype) : metric(prototype), _value(0) {}
+
+    my_gauge(const metric_prototype *prototype, int64_t value) : metric(prototype), _value(value) {}
+
+    virtual ~my_gauge() = default;
+
+    int64_t value() { return _value; }
+
+private:
+    int64_t _value;
+
+    DISALLOW_COPY_AND_ASSIGN(my_gauge);
+};
+
+using my_gauge_prototype = metric_prototype_with<my_gauge>;
+using my_gauge_ptr = ref_ptr<my_gauge>;
+
+} // namespace dsn
+
+#define METRIC_DEFINE_my_gauge(entity_type, name, unit, desc, ...)                                 \
+    ::dsn::my_gauge_prototype METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+
 METRIC_DEFINE_entity(my_server);
 METRIC_DEFINE_entity(my_table);
 METRIC_DEFINE_entity(my_replica);
 
+METRIC_DEFINE_my_gauge(my_server,
+                       my_server_latency,
+                       dsn::metric_unit::kMicroSeconds,
+                       "a server-level latency for test");
+METRIC_DEFINE_my_gauge(my_server,
+                       my_server_duration,
+                       dsn::metric_unit::kSeconds,
+                       "a server-level duration for test");
+
+METRIC_DEFINE_my_gauge(my_replica,
+                       my_replica_latency,
+                       dsn::metric_unit::kNanoSeconds,
+                       "a replica-level latency for test");
+METRIC_DEFINE_my_gauge(my_replica,
+                       my_replica_duration,
+                       dsn::metric_unit::kMilliSeconds,
+                       "a replica-level duration for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -105,11 +150,76 @@ TEST(metrics_test, recreate_entity)
         auto entity = METRIC_ENTITY_my_table.instantiate(entity_id, test.entity_attrs);
         ASSERT_EQ(entity, expected_entity);
 
-        // the attributes will updated
+        // the attributes will be updated
         auto attrs = entity->attributes();
         ASSERT_EQ(attrs.erase("entity"), 1);
         ASSERT_EQ(attrs, test.entity_attrs);
     }
 }
 
+TEST(metrics_test, create_metric)
+{
+    auto my_server_entity = METRIC_ENTITY_my_server.instantiate("server_3");
+    auto my_replica_entity =
+        METRIC_ENTITY_my_replica.instantiate("3.7", {{"table", "test_3"}, {"partition", "7"}});
+
+    // Test cases:
+    // - create an metric without any argument by an entity
+    // - create an metric with an argument by an entity
+    // - create an metric with an argument by another entity
+    // - create an metric without any argument by another entity
+    struct test_case
+    {
+        my_gauge_prototype *prototype;
+        metric_entity_ptr entity;
+        bool use_default_value;
+        int64_t value;
+    } tests[] = {{&METRIC_my_server_latency, my_server_entity, true, 0},
+                 {&METRIC_my_server_duration, my_server_entity, false, 10},
+                 {&METRIC_my_replica_latency, my_replica_entity, false, 100},
+                 {&METRIC_my_replica_duration, my_replica_entity, true, 0}};
+
+    using entity_map = std::unordered_map<metric_entity *, metric_entity::metric_map>;
+
+    entity_map expected_entities;
+    for (const auto &test : tests) {
+        my_gauge_ptr my_metric;
+        if (test.use_default_value) {
+            my_metric = test.prototype->instantiate(test.entity);
+        } else {
+            my_metric = test.prototype->instantiate(test.entity, test.value);
+        }
+
+        ASSERT_EQ(my_metric->value(), test.value);
+
+        auto iter = expected_entities.find(test.entity.get());
+        if (iter == expected_entities.end()) {
+            expected_entities[test.entity.get()] = {{test.prototype, my_metric}};
+        } else {
+            iter->second[test.prototype] = my_metric;
+        }
+    }
+
+    entity_map actual_entities;
+    auto entities = metric_registry::instance().entities();
+    for (const auto &entity : entities) {
+        if (expected_entities.find(entity.second.get()) != expected_entities.end()) {
+            actual_entities[entity.second.get()] = entity.second->metrics();
+        }
+    }
+
+    ASSERT_EQ(actual_entities, expected_entities);
+}
+
+TEST(metrics_test, recreate_metric)
+{
+    auto my_server_entity = METRIC_ENTITY_my_server.instantiate("server_4");
+
+    auto my_metric = METRIC_my_server_latency.instantiate(my_server_entity, 5);
+    ASSERT_EQ(my_metric->value(), 5);
+
+    auto new_metric = METRIC_my_server_latency.instantiate(my_server_entity, 10);
+    ASSERT_EQ(my_metric->value(), 5);
+}
+
 } // namespace dsn

From 03d0ec05ccf6b1582f1b939511b37d6fc13fc64c Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 23 Mar 2022 17:19:02 +0800
Subject: [PATCH 05/21] feat(new_metrics): implement the gauge (#1079)

---
 include/dsn/utility/metrics.h   |  77 +++++++++++++++++++++++-
 src/utils/test/metrics_test.cpp | 102 +++++++++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index e4448ba018..06e76017fb 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,8 +17,10 @@
 
 #pragma once
 
+#include <atomic>
 #include <mutex>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
@@ -41,14 +43,36 @@
 // To use the entity type, declare it at the top of any .h/.cpp file (not within any namespace):
 // METRIC_DECLARE_entity(my_entity);
 //
-// Instantiating the entity in whatever class represents the entity:
+// Instantiating the entity in whatever class represents it:
 // entity_instance = METRIC_ENTITY_my_entity.instantiate(my_entity_id, ...);
+//
+//
+// Example of defining and instantiating a metric
+// -----------------------------------------------------
+// Define an entity type at the top of your .cpp file (not within any namespace):
+// METRIC_DEFINE_gauge_int64(my_entity,
+//                           my_gauge_name,
+//                           dsn::metric_unit::kMilliSeconds,
+//                           "the description for my gauge");
+//
+// To use the metric prototype, declare it at the top of any .h/.cpp file (not within any
+// namespace):
+// METRIC_DECLARE_gauge_int64(my_gauge_name);
+//
+// Instantiating the metric in whatever class represents it with some initial arguments, if any:
+// metric_instance = METRIC_my_gauge_name.instantiate(entity_instance, ...);
 
-// Define a new entity type.
+// Convenient macros are provided to define entity types and metric prototypes.
 #define METRIC_DEFINE_entity(name) ::dsn::metric_entity_prototype METRIC_ENTITY_##name(#name)
+#define METRIC_DEFINE_gauge_int64(entity_type, name, unit, desc, ...)                              \
+    ::dsn::gauge_prototype<int64_t> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_gauge_double(entity_type, name, unit, desc, ...)                             \
+    ::dsn::gauge_prototype<double> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
+#define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
+#define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
 
 namespace dsn {
 
@@ -222,6 +246,9 @@ class metric_prototype_with : public metric_prototype
 // however, its lastest value is visible.
 class metric : public ref_counter
 {
+public:
+    const metric_prototype *prototype() const { return _prototype; }
+
 protected:
     explicit metric(const metric_prototype *prototype);
     virtual ~metric() = default;
@@ -232,4 +259,50 @@ class metric : public ref_counter
     DISALLOW_COPY_AND_ASSIGN(metric);
 };
 
+// A gauge is an instantaneous measurement of a discrete value. It represents a single numerical
+// value that can arbitrarily go up and down. It's typically used for measured values like current
+// memory usage, the total capacity and available ratio of a disk, etc.
+template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class gauge : public metric
+{
+public:
+    T value() const { return _value.load(std::memory_order_relaxed); }
+
+    void set(const T &val) { _value.store(val, std::memory_order_relaxed); }
+
+protected:
+    gauge(const metric_prototype *prototype, const T &initial_val)
+        : metric(prototype), _value(initial_val)
+    {
+    }
+
+    gauge(const metric_prototype *prototype);
+
+    virtual ~gauge() = default;
+
+private:
+    friend class metric_entity;
+    friend class ref_ptr<gauge<T>>;
+
+    std::atomic<T> _value;
+
+    DISALLOW_COPY_AND_ASSIGN(gauge);
+};
+
+template <>
+gauge<int64_t>::gauge(const metric_prototype *prototype) : gauge(prototype, 0)
+{
+}
+
+template <>
+gauge<double>::gauge(const metric_prototype *prototype) : gauge(prototype, 0.0)
+{
+}
+
+template <typename T>
+using gauge_ptr = ref_ptr<gauge<T>>;
+
+template <typename T>
+using gauge_prototype = metric_prototype_with<gauge<T>>;
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 5bc979bc43..e1b72e2b65 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -24,15 +24,19 @@ namespace dsn {
 class my_gauge : public metric
 {
 public:
+    int64_t value() { return _value; }
+
+protected:
     explicit my_gauge(const metric_prototype *prototype) : metric(prototype), _value(0) {}
 
     my_gauge(const metric_prototype *prototype, int64_t value) : metric(prototype), _value(value) {}
 
     virtual ~my_gauge() = default;
 
-    int64_t value() { return _value; }
-
 private:
+    friend class metric_entity;
+    friend class ref_ptr<my_gauge>;
+
     int64_t _value;
 
     DISALLOW_COPY_AND_ASSIGN(my_gauge);
@@ -68,6 +72,16 @@ METRIC_DEFINE_my_gauge(my_replica,
                        dsn::metric_unit::kMilliSeconds,
                        "a replica-level duration for test");
 
+METRIC_DEFINE_gauge_int64(my_server,
+                          test_gauge_int64,
+                          dsn::metric_unit::kMilliSeconds,
+                          "a server-level gauge of int64 type for test");
+
+METRIC_DEFINE_gauge_double(my_server,
+                           test_gauge_double,
+                           dsn::metric_unit::kSeconds,
+                           "a server-level gauge of double type for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -222,4 +236,88 @@ TEST(metrics_test, recreate_metric)
     ASSERT_EQ(my_metric->value(), 5);
 }
 
+TEST(metrics_test, gauge_int64)
+{
+
+    // Test cases:
+    // - create a gauge of int64 type without initial value, then increase
+    // - create a gauge of int64 type without initial value, then decrease
+    // - create a gauge of int64 type with initial value, then increase
+    // - create a gauge of int64 type with initial value, then decrease
+    struct test_case
+    {
+        std::string entity_id;
+        bool use_default_value;
+        int64_t initial_value;
+        int64_t new_value;
+    } tests[] = {{"server_5", true, 0, 5},
+                 {"server_6", true, 0, -5},
+                 {"server_7", false, 10, 100},
+                 {"server_8", false, 100, 10}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        gauge_ptr<int64_t> my_metric;
+        if (test.use_default_value) {
+            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity);
+        } else {
+            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity, test.initial_value);
+        }
+
+        ASSERT_EQ(my_metric->value(), test.initial_value);
+
+        my_metric->set(test.new_value);
+        ASSERT_EQ(my_metric->value(), test.new_value);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_int64].get()), my_metric.get());
+
+        ASSERT_EQ(my_metric->prototype(),
+                  static_cast<const metric_prototype *>(&METRIC_test_gauge_int64));
+    }
+}
+
+TEST(metrics_test, gauge_double)
+{
+
+    // Test cases:
+    // - create a gauge of double type without initial value, then increase
+    // - create a gauge of double type without initial value, then decrease
+    // - create a gauge of double type with initial value, then increase
+    // - create a gauge of double type with initial value, then decrease
+    struct test_case
+    {
+        std::string entity_id;
+        bool use_default_value;
+        double initial_value;
+        double new_value;
+    } tests[] = {{"server_9", true, 0.0, 5.278},
+                 {"server_10", true, 0.0, -5.278},
+                 {"server_11", false, 10.756, 100.128},
+                 {"server_12", false, 100.128, 10.756}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        gauge_ptr<double> my_metric;
+        if (test.use_default_value) {
+            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity);
+        } else {
+            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity, test.initial_value);
+        }
+
+        ASSERT_DOUBLE_EQ(my_metric->value(), test.initial_value);
+
+        my_metric->set(test.new_value);
+        ASSERT_DOUBLE_EQ(my_metric->value(), test.new_value);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_double].get()), my_metric.get());
+
+        ASSERT_EQ(my_metric->prototype(),
+                  static_cast<const metric_prototype *>(&METRIC_test_gauge_double));
+    }
+}
+
 } // namespace dsn

From 863325bd33eda71aa3c9f27abc90742138459061 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 25 Mar 2022 17:34:59 +0800
Subject: [PATCH 06/21] feat(new_metrics): implement the counter (#1081)

---
 include/dsn/utility/metrics.h   |  54 ++++++++++++
 src/utils/test/metrics_test.cpp | 147 +++++++++++++++++++++++++++++++-
 2 files changed, 198 insertions(+), 3 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 06e76017fb..7c1f479002 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -27,6 +27,7 @@
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
+#include <dsn/utility/long_adder.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
 #include <dsn/utility/string_view.h>
@@ -68,11 +69,27 @@
     ::dsn::gauge_prototype<int64_t> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
 #define METRIC_DEFINE_gauge_double(entity_type, name, unit, desc, ...)                             \
     ::dsn::gauge_prototype<double> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+// There are 2 kinds of counters:
+// - `counter` is the general type of counter that is implemented by striped_long_adder, which can
+//   achieve high performance while consuming less memory if it's not updated very frequently.
+// - `concurrent_counter` uses concurrent_long_adder as the underlying implementation. It has
+//   higher performance while consuming more memory if it's updated very frequently.
+// See also include/dsn/utility/long_adder.h for details.
+#define METRIC_DEFINE_counter(entity_type, name, unit, desc, ...)                                  \
+    ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name(                             \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_concurrent_counter(entity_type, name, unit, desc, ...)                       \
+    ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name(                          \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
 #define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
+#define METRIC_DECLARE_counter(name)                                                               \
+    extern ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name
+#define METRIC_DECLARE_concurrent_counter(name)                                                    \
+    extern ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name
 
 namespace dsn {
 
@@ -176,6 +193,7 @@ enum class metric_unit
     kMicroSeconds,
     kMilliSeconds,
     kSeconds,
+    kRequests,
     kInvalidUnit,
 };
 
@@ -305,4 +323,40 @@ using gauge_ptr = ref_ptr<gauge<T>>;
 template <typename T>
 using gauge_prototype = metric_prototype_with<gauge<T>>;
 
+// A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
+// used to measure the number of tasks in queues, current number of running manual compacts,
+// etc. All counters start out at 0.
+template <typename Adder = striped_long_adder>
+class counter : public metric
+{
+public:
+    int64_t value() const { return _adder.value(); }
+
+    void increment_by(int64_t x) { _adder.increment_by(x); }
+    void increment() { _adder.increment(); }
+    void decrement() { _adder.decrement(); }
+
+    void reset() { _adder.reset(); }
+
+protected:
+    counter(const metric_prototype *prototype) : metric(prototype) {}
+
+    virtual ~counter() = default;
+
+private:
+    friend class metric_entity;
+    friend class ref_ptr<counter<Adder>>;
+
+    long_adder_wrapper<Adder> _adder;
+
+    DISALLOW_COPY_AND_ASSIGN(counter);
+};
+
+template <typename Adder = striped_long_adder>
+using counter_ptr = ref_ptr<counter<Adder>>;
+using concurrent_counter_ptr = counter_ptr<concurrent_long_adder>;
+
+template <typename Adder = striped_long_adder>
+using counter_prototype = metric_prototype_with<counter<Adder>>;
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index e1b72e2b65..123661c888 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -16,6 +16,10 @@
 // under the License.
 
 #include <dsn/utility/metrics.h>
+#include <dsn/utility/rand.h>
+
+#include <thread>
+#include <vector>
 
 #include <gtest/gtest.h>
 
@@ -82,6 +86,16 @@ METRIC_DEFINE_gauge_double(my_server,
                            dsn::metric_unit::kSeconds,
                            "a server-level gauge of double type for test");
 
+METRIC_DEFINE_counter(my_server,
+                      test_counter,
+                      dsn::metric_unit::kRequests,
+                      "a server-level counter for test");
+
+METRIC_DEFINE_concurrent_counter(my_server,
+                                 test_concurrent_counter,
+                                 dsn::metric_unit::kRequests,
+                                 "a server-level concurrent_counter for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -238,7 +252,6 @@ TEST(metrics_test, recreate_metric)
 
 TEST(metrics_test, gauge_int64)
 {
-
     // Test cases:
     // - create a gauge of int64 type without initial value, then increase
     // - create a gauge of int64 type without initial value, then decrease
@@ -280,7 +293,6 @@ TEST(metrics_test, gauge_int64)
 
 TEST(metrics_test, gauge_double)
 {
-
     // Test cases:
     // - create a gauge of double type without initial value, then increase
     // - create a gauge of double type without initial value, then decrease
@@ -313,11 +325,140 @@ TEST(metrics_test, gauge_double)
         ASSERT_DOUBLE_EQ(my_metric->value(), test.new_value);
 
         auto metrics = my_server_entity->metrics();
-        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_double].get()), my_metric.get());
+        ASSERT_EQ(metrics[&METRIC_test_gauge_double].get(), static_cast<metric *>(my_metric.get()));
 
         ASSERT_EQ(my_metric->prototype(),
                   static_cast<const metric_prototype *>(&METRIC_test_gauge_double));
     }
 }
 
+void execute(int64_t num_threads, std::function<void(int)> runner)
+{
+    std::vector<std::thread> threads;
+    for (int64_t i = 0; i < num_threads; i++) {
+        threads.emplace_back([i, &runner]() { runner(i); });
+    }
+    for (auto &t : threads) {
+        t.join();
+    }
+}
+
+template <typename Adder>
+void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
+                              int64_t base_value,
+                              int64_t num_operations,
+                              int64_t num_threads,
+                              int64_t &result)
+{
+    std::vector<int64_t> deltas;
+    int64_t n = num_operations * num_threads;
+    deltas.reserve(n);
+
+    int64_t expected_value = base_value;
+    for (int64_t i = 0; i < n; ++i) {
+        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
+        if (delta % 3 == 0) {
+            delta = -delta;
+        }
+        expected_value += delta;
+        deltas.push_back(delta);
+    }
+
+    execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->increment_by(deltas[tid * num_operations + i]);
+        }
+    });
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
+                           int64_t base_value,
+                           int64_t num_operations,
+                           int64_t num_threads,
+                           int64_t &result)
+{
+    execute(num_threads, [num_operations, &my_metric](int) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->increment();
+        }
+    });
+
+    int64_t expected_value = base_value + num_operations * num_threads;
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
+                           int64_t base_value,
+                           int64_t num_operations,
+                           int64_t num_threads,
+                           int64_t &result)
+{
+    execute(num_threads, [num_operations, &my_metric](int) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->decrement();
+        }
+    });
+
+    int64_t expected_value = base_value - num_operations * num_threads;
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
+{
+    // Test cases:
+    // - test the counter with small-scale computations
+    // - test the counter with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t increments_by;
+        int64_t increments;
+        int64_t decrements;
+    } tests[] = {{"server_9", 100, 1000, 1000}, {"server_10", 1000000, 10000000, 10000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        int64_t value = 0;
+        ASSERT_EQ(my_metric->value(), value);
+        run_counter_increment_by(my_metric, value, test.increments_by, num_threads, value);
+        run_counter_increment(my_metric, value, test.increments, num_threads, value);
+        run_counter_decrement(my_metric, value, test.decrements, num_threads, value);
+
+        my_metric->reset();
+        ASSERT_EQ(my_metric->value(), 0);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
+
+        ASSERT_EQ(my_metric->prototype(), prototype);
+    }
+}
+
+template <typename Adder>
+void run_counter_cases(::dsn::counter_prototype<Adder> *prototype)
+{
+    // Do single-threaded tests
+    run_counter_cases(prototype, 1);
+
+    // Do multi-threaded tests
+    run_counter_cases(prototype, 4);
+}
+
+TEST(metrics_test, counter)
+{
+    // Test both kinds of counter
+    run_counter_cases<striped_long_adder>(&METRIC_test_counter);
+    run_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_counter);
+}
+
 } // namespace dsn

From 2a7a21a508032cc2e0e24430da66d69149a2f79f Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 13 Apr 2022 20:43:23 +0800
Subject: [PATCH 07/21] feat(new_metrics): implement the volatile counter
 (#1083)

---
 include/dsn/utility/metrics.h   |  62 ++++++++++++---
 src/utils/test/metrics_test.cpp | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 183 insertions(+), 10 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 7c1f479002..de7be1e831 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -76,10 +76,16 @@
 //   higher performance while consuming more memory if it's updated very frequently.
 // See also include/dsn/utility/long_adder.h for details.
 #define METRIC_DEFINE_counter(entity_type, name, unit, desc, ...)                                  \
-    ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name(                             \
+    dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name(                          \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 #define METRIC_DEFINE_concurrent_counter(entity_type, name, unit, desc, ...)                       \
-    ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name(                          \
+    dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name(                       \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_volatile_counter(entity_type, name, unit, desc, ...)                         \
+    dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name(                           \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_concurrent_volatile_counter(entity_type, name, unit, desc, ...)              \
+    dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name(                        \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
@@ -87,9 +93,13 @@
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
 #define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
 #define METRIC_DECLARE_counter(name)                                                               \
-    extern ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name
+    extern dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name
 #define METRIC_DECLARE_concurrent_counter(name)                                                    \
-    extern ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name
+    extern dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name
+#define METRIC_DECLARE_volatile_counter(name)                                                      \
+    extern dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name
+#define METRIC_DECLARE_concurrent_volatile_counter(name)                                           \
+    extern dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name
 
 namespace dsn {
 
@@ -326,11 +336,33 @@ using gauge_prototype = metric_prototype_with<gauge<T>>;
 // A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
 // used to measure the number of tasks in queues, current number of running manual compacts,
 // etc. All counters start out at 0.
-template <typename Adder = striped_long_adder>
+//
+// `IsVolatile` is false by default. Once it's specified as true, the counter will be volatile.
+// The value() function of a volatile counter will reset the counter atomically after its value
+// is fetched. A volatile counter can also be called as a "recent" counter.
+//
+// Sometimes "recent" counters are needed, such as the number of recent failed beacons sent from
+// replica server, the count of updating configurations of partitions recently, etc. The "recent"
+// count can be considered to be the accumulated count since it has been fetched last by value().
+//
+// In most cases, a general (i.e. non-volatile) counter is enough, which means it can also work
+// for "recent" counters. For example, in Prometheus, delta() can be used to compute "recent"
+// count for a general counter. Therefore, declare a counter as volatile only when necessary.
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
 class counter : public metric
 {
 public:
-    int64_t value() const { return _adder.value(); }
+    template <bool Volatile = IsVolatile, typename = typename std::enable_if<!Volatile>::type>
+    int64_t value() const
+    {
+        return _adder.value();
+    }
+
+    template <bool Volatile = IsVolatile, typename = typename std::enable_if<Volatile>::type>
+    int64_t value()
+    {
+        return _adder.fetch_and_reset();
+    }
 
     void increment_by(int64_t x) { _adder.increment_by(x); }
     void increment() { _adder.increment(); }
@@ -345,18 +377,28 @@ class counter : public metric
 
 private:
     friend class metric_entity;
-    friend class ref_ptr<counter<Adder>>;
+    friend class ref_ptr<counter<Adder, IsVolatile>>;
 
     long_adder_wrapper<Adder> _adder;
 
     DISALLOW_COPY_AND_ASSIGN(counter);
 };
 
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
+using counter_ptr = ref_ptr<counter<Adder, IsVolatile>>;
+
+template <bool IsVolatile = false>
+using concurrent_counter_ptr = counter_ptr<concurrent_long_adder, IsVolatile>;
+
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
+using counter_prototype = metric_prototype_with<counter<Adder, IsVolatile>>;
+
 template <typename Adder = striped_long_adder>
-using counter_ptr = ref_ptr<counter<Adder>>;
-using concurrent_counter_ptr = counter_ptr<concurrent_long_adder>;
+using volatile_counter_ptr = ref_ptr<counter<Adder, true>>;
+
+using concurrent_volatile_counter_ptr = counter_ptr<concurrent_long_adder, true>;
 
 template <typename Adder = striped_long_adder>
-using counter_prototype = metric_prototype_with<counter<Adder>>;
+using volatile_counter_prototype = metric_prototype_with<counter<Adder, true>>;
 
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 123661c888..cd38ad5116 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -96,6 +96,16 @@ METRIC_DEFINE_concurrent_counter(my_server,
                                  dsn::metric_unit::kRequests,
                                  "a server-level concurrent_counter for test");
 
+METRIC_DEFINE_volatile_counter(my_server,
+                               test_volatile_counter,
+                               dsn::metric_unit::kRequests,
+                               "a server-level volatile_counter for test");
+
+METRIC_DEFINE_concurrent_volatile_counter(my_server,
+                                          test_concurrent_volatile_counter,
+                                          dsn::metric_unit::kRequests,
+                                          "a server-level concurrent_volatile_counter for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -461,4 +471,125 @@ TEST(metrics_test, counter)
     run_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_counter);
 }
 
+template <typename Adder>
+void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_metric,
+                                         int64_t num_operations,
+                                         int64_t num_threads_write,
+                                         int64_t num_threads_read)
+{
+    std::vector<int64_t> deltas;
+    int64_t n = num_operations * num_threads_write;
+    deltas.reserve(n);
+
+    int64_t expected_value = 0;
+    for (int64_t i = 0; i < n; ++i) {
+        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
+        if (delta % 3 == 0) {
+            delta = -delta;
+        }
+        expected_value += delta;
+        deltas.push_back(delta);
+    }
+
+    auto results = new_cacheline_aligned_int64_array(static_cast<uint32_t>(num_threads_read));
+    std::vector<std::atomic_bool> completed(num_threads_write);
+    for (int64_t i = 0; i < num_threads_write; ++i) {
+        completed[i].store(false);
+    }
+
+    ASSERT_EQ(my_metric->value(), 0);
+
+    execute(num_threads_write + num_threads_read,
+            [num_operations, num_threads_write, &my_metric, &deltas, &results, &completed](
+                int tid) mutable {
+                if (tid < num_threads_write) {
+                    for (int64_t i = 0; i < num_operations; ++i) {
+                        my_metric->increment_by(deltas[tid * num_operations + i]);
+                    }
+                    completed[tid].store(true);
+                } else {
+                    bool done = false;
+                    do {
+                        int64_t i = 0;
+                        for (; i < num_threads_write && completed[i].load(); ++i) {
+                        }
+                        if (i >= num_threads_write) {
+                            // All of the increment threads have finished, thus the loop can
+                            // be broken after the last time the value is fetched.
+                            done = true;
+                        }
+
+                        auto value = my_metric->value();
+                        if (value == 0) {
+                            // If zero is fetched, it's likely that recently the counter is
+                            // not updated frequently. Thus yield and try for the next time.
+                            std::this_thread::yield();
+                        } else {
+                            auto r = results.get();
+                            r[tid - num_threads_write]._value += value;
+                        }
+                    } while (!done);
+                }
+            });
+
+    int64_t value = 0;
+    for (int64_t i = 0; i < num_threads_read; ++i) {
+        value += results.get()[i]._value.load();
+    }
+    ASSERT_EQ(value, expected_value);
+    ASSERT_EQ(my_metric->value(), 0);
+}
+
+template <typename Adder>
+void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype,
+                                int64_t num_threads_write,
+                                int64_t num_threads_read)
+{
+    // Test cases:
+    // - test the volatile counter with small-scale computations
+    // - test the volatile counter with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t num_operations;
+    } tests[] = {{"server_11", 5000}, {"server_12", 5000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        run_volatile_counter_write_and_read(
+            my_metric, test.num_operations, num_threads_write, num_threads_read);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
+
+        ASSERT_EQ(my_metric->prototype(), prototype);
+    }
+}
+
+template <typename Adder>
+void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype)
+{
+    // Write with single thread and read with single thread
+    run_volatile_counter_cases(prototype, 1, 1);
+
+    // Write with multiple threads and read with single thread
+    run_volatile_counter_cases(prototype, 2, 1);
+
+    // Write with single thread and read with multiple threads
+    run_volatile_counter_cases(prototype, 1, 2);
+
+    // Write with multiple threads and read with multiple threads
+    run_volatile_counter_cases(prototype, 4, 2);
+}
+
+TEST(metrics_test, volatile_counter)
+{
+    // Test both kinds of volatile counter
+    run_volatile_counter_cases<striped_long_adder>(&METRIC_test_volatile_counter);
+    run_volatile_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_volatile_counter);
+}
+
 } // namespace dsn

From bf33b2b964dcac0b610e90b027f50ffda4fbe10b Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Tue, 19 Apr 2022 21:49:02 +0800
Subject: [PATCH 08/21] feat(new_metrics): make the counter increment
 monotonically (#1095)

---
 include/dsn/utility/metrics.h   |  62 +++++++++++++---
 src/utils/metrics.cpp           |   1 -
 src/utils/test/metrics_test.cpp | 122 +++++++++++++++++++++++---------
 3 files changed, 142 insertions(+), 43 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index de7be1e831..22da07b33c 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -24,6 +24,8 @@
 #include <unordered_map>
 #include <utility>
 
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
@@ -287,9 +289,16 @@ class metric : public ref_counter
     DISALLOW_COPY_AND_ASSIGN(metric);
 };
 
-// A gauge is an instantaneous measurement of a discrete value. It represents a single numerical
-// value that can arbitrarily go up and down. It's typically used for measured values like current
-// memory usage, the total capacity and available ratio of a disk, etc.
+// A gauge is a metric that represents a single numerical value that can arbitrarily go up and
+// down. Usually there are 2 scenarios for a guage.
+//
+// Firstly, a gauge can be used as an instantaneous measurement of a discrete value. Typical
+// usages in this scenario are current memory usage, the total capacity and available ratio of
+// a disk, etc.
+//
+// Secondly, a gauge can be used as a counter that increases and decreases. In this scenario only
+// integral types are supported, and its typical usages are the number of tasks in queues, current
+// number of running manual compacts, etc.
 template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 class gauge : public metric
 {
@@ -298,6 +307,34 @@ class gauge : public metric
 
     void set(const T &val) { _value.store(val, std::memory_order_relaxed); }
 
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void increment_by(Int x)
+    {
+        _value.fetch_add(x, std::memory_order_relaxed);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void decrement_by(Int x)
+    {
+        increment_by(-x);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void increment()
+    {
+        increment_by(1);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void decrement()
+    {
+        increment_by(-1);
+    }
+
 protected:
     gauge(const metric_prototype *prototype, const T &initial_val)
         : metric(prototype), _value(initial_val)
@@ -333,9 +370,13 @@ using gauge_ptr = ref_ptr<gauge<T>>;
 template <typename T>
 using gauge_prototype = metric_prototype_with<gauge<T>>;
 
-// A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
-// used to measure the number of tasks in queues, current number of running manual compacts,
-// etc. All counters start out at 0.
+// A counter in essence is a 64-bit integer that increases monotonically. It should be noted that
+// the counter does not support to decrease. If decrease is needed, please consider to use the
+// gauge instead.
+//
+// The counter can be typically used to measure the number of processed requests, which in the
+// future can be help to compute the QPS. All counters start out at 0, and are non-negative
+// since they are monotonic.
 //
 // `IsVolatile` is false by default. Once it's specified as true, the counter will be volatile.
 // The value() function of a volatile counter will reset the counter atomically after its value
@@ -364,9 +405,14 @@ class counter : public metric
         return _adder.fetch_and_reset();
     }
 
-    void increment_by(int64_t x) { _adder.increment_by(x); }
+    // NOTICE: x MUST be a non-negative integer.
+    void increment_by(int64_t x)
+    {
+        dassert_f(x >= 0, "delta({}) by increment for counter must be a non-negative integer", x);
+        _adder.increment_by(x);
+    }
+
     void increment() { _adder.increment(); }
-    void decrement() { _adder.decrement(); }
 
     void reset() { _adder.reset(); }
 
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 8f96792b81..0ec1415cac 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -18,7 +18,6 @@
 #include <dsn/utility/metrics.h>
 
 #include <dsn/c/api_utilities.h>
-#include <dsn/dist/fmt_logging.h>
 
 namespace dsn {
 
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index cd38ad5116..6ba5b65687 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -294,7 +294,7 @@ TEST(metrics_test, gauge_int64)
         ASSERT_EQ(my_metric->value(), test.new_value);
 
         auto metrics = my_server_entity->metrics();
-        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_int64].get()), my_metric.get());
+        ASSERT_EQ(metrics[&METRIC_test_gauge_int64].get(), static_cast<metric *>(my_metric.get()));
 
         ASSERT_EQ(my_metric->prototype(),
                   static_cast<const metric_prototype *>(&METRIC_test_gauge_int64));
@@ -353,12 +353,26 @@ void execute(int64_t num_threads, std::function<void(int)> runner)
     }
 }
 
-template <typename Adder>
-void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
-                              int64_t base_value,
-                              int64_t num_operations,
-                              int64_t num_threads,
-                              int64_t &result)
+template <typename MetricPtr>
+void increment_by(std::integral_constant<bool, true>, MetricPtr &my_metric, int64_t x)
+{
+    my_metric->increment_by(x);
+}
+
+template <typename MetricPtr>
+void increment_by(std::integral_constant<bool, false>, MetricPtr &my_metric, int64_t x)
+{
+    // If x is positive, metric will be increased; otherwise, the metric will be decreased.
+    my_metric->decrement_by(-x);
+}
+
+template <bool IsIncrement, typename MetricPtr>
+void run_increment_by(MetricPtr &my_metric,
+                      int64_t base_value,
+                      int64_t num_operations,
+                      int64_t num_threads,
+                      int64_t &result,
+                      bool allow_negative = true)
 {
     std::vector<int64_t> deltas;
     int64_t n = num_operations * num_threads;
@@ -367,7 +381,7 @@ void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
     int64_t expected_value = base_value;
     for (int64_t i = 0; i < n; ++i) {
         auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
-        if (delta % 3 == 0) {
+        if (allow_negative && delta % 3 == 0) {
             delta = -delta;
         }
         expected_value += delta;
@@ -376,19 +390,20 @@ void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
 
     execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
-            my_metric->increment_by(deltas[tid * num_operations + i]);
+            auto delta = deltas[tid * num_operations + i];
+            increment_by(std::integral_constant<bool, IsIncrement>{}, my_metric, delta);
         }
     });
     ASSERT_EQ(my_metric->value(), expected_value);
     result = expected_value;
 }
 
-template <typename Adder>
-void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
-                           int64_t base_value,
-                           int64_t num_operations,
-                           int64_t num_threads,
-                           int64_t &result)
+template <typename MetricPtr>
+void run_increment(MetricPtr &my_metric,
+                   int64_t base_value,
+                   int64_t num_operations,
+                   int64_t num_threads,
+                   int64_t &result)
 {
     execute(num_threads, [num_operations, &my_metric](int) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
@@ -401,12 +416,12 @@ void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
     result = expected_value;
 }
 
-template <typename Adder>
-void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
-                           int64_t base_value,
-                           int64_t num_operations,
-                           int64_t num_threads,
-                           int64_t &result)
+template <typename MetricPtr>
+void run_decrement(MetricPtr &my_metric,
+                   int64_t base_value,
+                   int64_t num_operations,
+                   int64_t num_threads,
+                   int64_t &result)
 {
     execute(num_threads, [num_operations, &my_metric](int) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
@@ -419,8 +434,52 @@ void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
     result = expected_value;
 }
 
+void run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype, int64_t num_threads)
+{
+    // Test cases:
+    // - test the gauge with small-scale computations
+    // - test the gauge with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t increments_by;
+        int64_t decrements_by;
+        int64_t increments;
+        int64_t decrements;
+    } tests[] = {{"server_13", 100, 100, 1000, 1000},
+                 {"server_14", 1000000, 1000000, 10000000, 10000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        int64_t value = 0;
+        ASSERT_EQ(my_metric->value(), value);
+        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value);
+        run_increment_by<false>(my_metric, value, test.decrements_by, num_threads, value);
+        run_increment(my_metric, value, test.increments, num_threads, value);
+        run_decrement(my_metric, value, test.decrements, num_threads, value);
+
+        // Reset to 0 since this metric could be used again
+        my_metric->set(0);
+        ASSERT_EQ(my_metric->value(), 0);
+    }
+}
+
+void run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype)
+{
+    // Do single-threaded tests
+    run_gauge_increment_cases(prototype, 1);
+
+    // Do multi-threaded tests
+    run_gauge_increment_cases(prototype, 4);
+}
+
+TEST(metrics_test, gauge_increment) { run_gauge_increment_cases(&METRIC_test_gauge_int64); }
+
 template <typename Adder>
-void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
+void run_counter_cases(dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
 {
     // Test cases:
     // - test the counter with small-scale computations
@@ -430,8 +489,7 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
         std::string entity_id;
         int64_t increments_by;
         int64_t increments;
-        int64_t decrements;
-    } tests[] = {{"server_9", 100, 1000, 1000}, {"server_10", 1000000, 10000000, 10000000}};
+    } tests[] = {{"server_15", 100, 1000}, {"server_16", 1000000, 10000000}};
 
     for (const auto &test : tests) {
         auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
@@ -440,9 +498,8 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
 
         int64_t value = 0;
         ASSERT_EQ(my_metric->value(), value);
-        run_counter_increment_by(my_metric, value, test.increments_by, num_threads, value);
-        run_counter_increment(my_metric, value, test.increments, num_threads, value);
-        run_counter_decrement(my_metric, value, test.decrements, num_threads, value);
+        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value, false);
+        run_increment(my_metric, value, test.increments, num_threads, value);
 
         my_metric->reset();
         ASSERT_EQ(my_metric->value(), 0);
@@ -450,12 +507,12 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
         auto metrics = my_server_entity->metrics();
         ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
 
-        ASSERT_EQ(my_metric->prototype(), prototype);
+        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));
     }
 }
 
 template <typename Adder>
-void run_counter_cases(::dsn::counter_prototype<Adder> *prototype)
+void run_counter_cases(dsn::counter_prototype<Adder> *prototype)
 {
     // Do single-threaded tests
     run_counter_cases(prototype, 1);
@@ -484,9 +541,6 @@ void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_me
     int64_t expected_value = 0;
     for (int64_t i = 0; i < n; ++i) {
         auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
-        if (delta % 3 == 0) {
-            delta = -delta;
-        }
         expected_value += delta;
         deltas.push_back(delta);
     }
@@ -552,7 +606,7 @@ void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototyp
     {
         std::string entity_id;
         int64_t num_operations;
-    } tests[] = {{"server_11", 5000}, {"server_12", 5000000}};
+    } tests[] = {{"server_17", 5000}, {"server_18", 5000000}};
 
     for (const auto &test : tests) {
         auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
@@ -565,7 +619,7 @@ void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototyp
         auto metrics = my_server_entity->metrics();
         ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
 
-        ASSERT_EQ(my_metric->prototype(), prototype);
+        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));
     }
 }
 

From b50653e406e06b06afb92c390e34c0bc36d29c1f Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 25 May 2022 18:08:17 +0800
Subject: [PATCH 09/21] feat(new_metrics): support to find multiple nth
 elements of a sequence container at a time based on nth_element() of STL
 (#1106)

---
 include/dsn/utility/nth_element.h             | 128 ++++++++
 src/perf_counter/perf_counter_atomic.h        |  18 +-
 src/utils/test/CMakeLists.txt                 |   1 +
 .../test/nth_element_bench/CMakeLists.txt     |  39 +++
 .../nth_element_bench/nth_element_bench.cpp   | 230 ++++++++++++++
 src/utils/test/nth_element_test.cpp           | 288 ++++++++++++++++++
 src/utils/test/nth_element_utils.h            | 180 +++++++++++
 7 files changed, 881 insertions(+), 3 deletions(-)
 create mode 100644 include/dsn/utility/nth_element.h
 create mode 100644 src/utils/test/nth_element_bench/CMakeLists.txt
 create mode 100644 src/utils/test/nth_element_bench/nth_element_bench.cpp
 create mode 100644 src/utils/test/nth_element_test.cpp
 create mode 100644 src/utils/test/nth_element_utils.h

diff --git a/include/dsn/utility/nth_element.h b/include/dsn/utility/nth_element.h
new file mode 100644
index 0000000000..d6fbe85d04
--- /dev/null
+++ b/include/dsn/utility/nth_element.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+
+namespace dsn {
+
+// The finder helps to find multiple nth elements of a sequence container (e.g. std::vector)
+// at a time, based on nth_element() of STL.
+template <typename T, typename Compare = std::less<T>>
+class stl_nth_element_finder
+{
+public:
+    using value_type = T;
+    using container_type = std::vector<value_type>;
+    using size_type = typename container_type::size_type;
+    using nth_container_type = std::vector<size_type>;
+
+    stl_nth_element_finder(const Compare &comp = Compare()) : _nths(), _elements(), _comp(comp) {}
+
+    // Set with specified nth indexes. An nth index is typically an index of the sequence
+    // container (e.g. std::vector). This method allows nth indexes to be updated dynamically.
+    //
+    // There are 2 reasons why both `_nths` and `_elements` are put into the sequence container:
+    //
+    // (1) The users of stl_nth_element_finder, such as the metric of percentile, may pass
+    // duplicate nth indexes to `_nths`. For example, suppose that the sampled window size is
+    // 100, both P99 and P999 will have the same nth element -- namely 99th element. Thus it's
+    // will be convenient for users if `nths` can contain duplicate elements.
+    //
+    // The sequence container can contain duplicate elements, even if all elements in the container
+    // are sorted. Therefore, there may be identical indexes in `nths`.
+    //
+    // (2) The sequence container is more cache-friendly. While an nth element is selected, it's
+    // cache-friendly to write it into `_elements`. After all nth elements are collected into
+    // `_elements`, scanning them (`elements()`) is also cache-friendly, even if there are many
+    // nth indexes in `_nths`. In contrast to this, access directly to the nth element in array
+    // will not be cache-friendly especially when the array is large.
+    //
+    // Notice that the indexes in `nths` list must be ordered. After `operator()` is executed,
+    // the elements returned by `elements()` will be in the order of the sorted nth indexes.
+    void set_nths(const nth_container_type &nths)
+    {
+        _nths = nths;
+        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),
+                  "nth indexes({}) is not sorted",
+                  fmt::join(_nths, " "));
+
+        _elements.assign(_nths.size(), value_type{});
+    }
+
+    // Find the multiple nth elements.
+    //
+    // Typically `begin` is the beginning iterator of the sequence container. `begin` plus each
+    // member of `_nths` will be the real nth element of the sequence container.
+    //
+    // [first, last) is the real range for finding the multiple nth elements.
+    template <typename RandomAccessIterator>
+    void
+    operator()(RandomAccessIterator begin, RandomAccessIterator first, RandomAccessIterator last)
+    {
+        for (size_type i = 0; i < _nths.size();) {
+            auto nth_iter = begin + _nths[i];
+            dassert_f(nth_iter >= first && nth_iter < last, "Invalid iterators for nth_element()");
+            std::nth_element(first, nth_iter, last, _comp);
+            _elements[i] = *nth_iter;
+
+            // Identical nth indexes should be processed. See `set_nths()` for details.
+            for (++i; i < _nths.size() && _nths[i] == _nths[i - 1]; ++i) {
+                _elements[i] = *nth_iter;
+            }
+
+            first = nth_iter + 1;
+        }
+    }
+
+    const container_type &elements() const { return _elements; }
+
+private:
+    nth_container_type _nths;
+    container_type _elements;
+    Compare _comp;
+
+    DISALLOW_COPY_AND_ASSIGN(stl_nth_element_finder);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+class floating_comparator
+{
+public:
+    bool operator()(const T &lhs, const T &rhs) const
+    {
+        return rhs - lhs >= std::numeric_limits<T>::epsilon();
+    }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_stl_nth_element_finder = stl_nth_element_finder<T, floating_comparator<T>>;
+
+} // namespace dsn
diff --git a/src/perf_counter/perf_counter_atomic.h b/src/perf_counter/perf_counter_atomic.h
index 0c2a5e1146..74d3aee74f 100644
--- a/src/perf_counter/perf_counter_atomic.h
+++ b/src/perf_counter/perf_counter_atomic.h
@@ -197,7 +197,8 @@ class perf_counter_number_percentile_atomic : public perf_counter
                                           const char *section,
                                           const char *name,
                                           dsn_perf_counter_type_t type,
-                                          const char *dsptr)
+                                          const char *dsptr,
+                                          bool use_timer = true)
         : perf_counter(app, section, name, type, dsptr), _tail(0)
     {
         _results[COUNTER_PERCENTILE_50] = 0;
@@ -206,6 +207,10 @@ class perf_counter_number_percentile_atomic : public perf_counter
         _results[COUNTER_PERCENTILE_99] = 0;
         _results[COUNTER_PERCENTILE_999] = 0;
 
+        if (!use_timer) {
+            return;
+        }
+
         _counter_computation_interval_seconds = (int)dsn_config_get_value_uint64(
             "components.pegasus_perf_counter_number_percentile_atomic",
             "counter_computation_interval_seconds",
@@ -214,12 +219,17 @@ class perf_counter_number_percentile_atomic : public perf_counter
             "pegasus_perf_counter_number_percentile_atomic counters");
         _timer.reset(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios));
         _timer->expires_from_now(
-            boost::posix_time::seconds(rand() % _counter_computation_interval_seconds + 1));
+            boost::posix_time::seconds(::rand() % _counter_computation_interval_seconds + 1));
         _timer->async_wait(std::bind(
             &perf_counter_number_percentile_atomic::on_timer, this, _timer, std::placeholders::_1));
     }
 
-    ~perf_counter_number_percentile_atomic(void) { _timer->cancel(); }
+    ~perf_counter_number_percentile_atomic(void)
+    {
+        if (_timer) {
+            _timer->cancel();
+        }
+    }
 
     virtual void increment() { dassert(false, "invalid execution flow"); }
     virtual void decrement() { dassert(false, "invalid execution flow"); }
@@ -277,6 +287,8 @@ class perf_counter_number_percentile_atomic : public perf_counter
     }
 
 private:
+    friend class perf_counter_nth_element_finder;
+
     struct compute_context
     {
         int64_t ask[COUNTER_PERCENTILE_COUNT];
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt
index d0d27d1222..0516fa7dcd 100644
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -25,5 +25,6 @@ set(MY_BINPLACES "${CMAKE_CURRENT_SOURCE_DIR}/config-bad-section.ini"
         "${CMAKE_CURRENT_SOURCE_DIR}/run.sh"
         "${CMAKE_CURRENT_SOURCE_DIR}/clear.sh"
         )
+add_subdirectory(nth_element_bench)
 add_definitions(-Wno-dangling-else)
 dsn_add_test()
diff --git a/src/utils/test/nth_element_bench/CMakeLists.txt b/src/utils/test/nth_element_bench/CMakeLists.txt
new file mode 100644
index 0000000000..217d9c4363
--- /dev/null
+++ b/src/utils/test/nth_element_bench/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(MY_PROJ_NAME nth_element_bench)
+project(${MY_PROJ_NAME} C CXX)
+
+# Source files under CURRENT project directory will be automatically included.
+# You can manually set MY_PROJ_SRC to include source files under other directories.
+set(MY_PROJ_SRC "")
+
+# Search mode for source files under CURRENT project directory?
+# "GLOB_RECURSE" for recursive search
+# "GLOB" for non-recursive search
+set(MY_SRC_SEARCH_MODE "GLOB")
+
+set(MY_PROJ_LIBS dsn_runtime dsn_utils)
+
+set(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)
+
+# Extra files that will be installed
+set(MY_BINPLACES "")
+
+dsn_add_executable()
+
+dsn_install_executable()
diff --git a/src/utils/test/nth_element_bench/nth_element_bench.cpp b/src/utils/test/nth_element_bench/nth_element_bench.cpp
new file mode 100644
index 0000000000..c1c5a2382d
--- /dev/null
+++ b/src/utils/test/nth_element_bench/nth_element_bench.cpp
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/nth_element.h>
+#include <dsn/utility/smart_pointers.h>
+#include <dsn/utility/string_conv.h>
+#include <dsn/utility/strings.h>
+
+#include "utils/test/nth_element_utils.h"
+
+void print_usage(const char *cmd)
+{
+    fmt::print("USAGE: {} <num_operations> <array_size> [nths]\n", cmd);
+    fmt::print("Run a simple benchmark that executes all sorts of nth_element_finder.\n\n");
+
+    fmt::print("    <num_operations>       the number of operations.\n");
+    fmt::print("    <array_size>           the size of array for each operation.\n");
+    fmt::print("    <range_size>           the size of range for each operation to \n"
+               "                           generate the integers randomly.\n");
+    fmt::print("    [nths]                 the nth list for each operation, separated by \n"
+               "                           comma(,) if more than one element, e.g., \n"
+               "                           \"2,5\" means finding 2nd and 5th elements;\n"
+               "                           if this arg is missing, nth list of \n"
+               "                           perf_counter_number_percentile_atomic will be \n"
+               "                           used, that is, P50, P90, P95, P99 and P999.\n");
+}
+
+template <typename NthElementFinder>
+int64_t run_nth_element(const std::vector<int64_t> &expected_elements,
+                        NthElementFinder &finder,
+                        std::function<void()> exec)
+{
+    auto start = dsn_now_ns();
+    exec();
+    auto end = dsn_now_ns();
+
+    if (finder.elements() != expected_elements) {
+        fmt::print(
+            "actual_elements != expected_elements\nactual_elements = {}\nexpected_elements: {}\n",
+            fmt::join(finder.elements(), " "),
+            fmt::join(expected_elements, " "));
+        ::exit(-1);
+    }
+
+    return static_cast<int64_t>(end - start);
+}
+
+int64_t run_stl_nth_element(const std::vector<int64_t> &array,
+                            const std::vector<int64_t> &expected_elements,
+                            dsn::stl_nth_element_finder<int64_t> &finder)
+{
+    auto start = dsn_now_ns();
+    std::vector<int64_t> container(array.size());
+    std::copy(array.begin(), array.end(), container.begin());
+    auto end = dsn_now_ns();
+
+    return static_cast<int64_t>(end - start) +
+           run_nth_element(expected_elements, finder, [&finder, &container]() {
+               finder(container.begin(), container.begin(), container.end());
+           });
+}
+
+void run_bench(size_t num_operations,
+               size_t array_size,
+               uint64_t range_size,
+               const std::vector<size_t> &nths)
+{
+    auto get_perf_counter_nths = [](size_t num) -> std::vector<size_t> {
+        return {static_cast<size_t>(num * 0.5),
+                static_cast<size_t>(num * 0.9),
+                static_cast<size_t>(num * 0.95),
+                static_cast<size_t>(num * 0.99),
+                static_cast<size_t>(num * 0.999)};
+    };
+
+    dsn::perf_counter_nth_element_finder perf_counter_finder;
+    dsn::stl_nth_element_finder<int64_t> stl_finder;
+
+    std::map<std::string, int64_t> exec_time_map = {{"perf_counter_nth_element", 0},
+                                                    {"stl_nth_element", 0}};
+    for (size_t i = 0; i < num_operations; ++i) {
+        std::vector<size_t> real_nths;
+        if (nths.empty()) {
+            real_nths = get_perf_counter_nths(array_size);
+        } else {
+            real_nths = nths;
+        }
+
+        dsn::integral_nth_element_case_generator<int64_t> generator(
+            array_size, 0, range_size, real_nths);
+
+        std::vector<int64_t> array;
+        std::vector<int64_t> expected_elements;
+        generator(array, expected_elements);
+
+        // Once `nths` is empty, the comparison between stl_nth_element_finder and
+        // perf_counter_nth_element_finder will be launched.
+        if (nths.empty()) {
+            perf_counter_finder.load_data(array);
+            exec_time_map["perf_counter_nth_element"] +=
+                run_nth_element(expected_elements, perf_counter_finder, [&perf_counter_finder]() {
+                    perf_counter_finder();
+                });
+        }
+
+        stl_finder.set_nths(real_nths);
+        exec_time_map["stl_nth_element"] +=
+            run_stl_nth_element(array, expected_elements, stl_finder);
+    }
+
+    for (const auto &t : exec_time_map) {
+        if (t.second == 0) {
+            continue;
+        }
+
+        std::chrono::nanoseconds nano(t.second);
+        auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();
+        fmt::print("Running {} operations of {} with each array {} elements took {} seconds.\n",
+                   num_operations,
+                   t.first,
+                   array_size,
+                   duration_s);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 4) {
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t num_operations;
+    if (!dsn::buf2uint64(argv[1], num_operations)) {
+        fmt::print(stderr, "Invalid num_operations: {}\n\n", argv[1]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (num_operations <= 0) {
+        fmt::print(stderr, "num_operations should be > 0: {}\n\n", num_operations);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t array_size;
+    if (!dsn::buf2uint64(argv[2], array_size)) {
+        fmt::print(stderr, "Invalid array_size: {}\n\n", argv[2]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (array_size <= 0 || array_size > MAX_QUEUE_LENGTH) {
+        fmt::print(
+            stderr, "array_size({}) should be > 0 and <= {}\n\n", array_size, MAX_QUEUE_LENGTH);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t range_size;
+    if (!dsn::buf2uint64(argv[3], range_size)) {
+        fmt::print(stderr, "Invalid range_size: {}\n\n", argv[3]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (range_size <= 0) {
+        fmt::print(stderr, "range_size({}) should be > 0\n\n", range_size);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    std::vector<size_t> nths;
+    if (argc >= 5) {
+        std::vector<std::string> nth_strs;
+        dsn::utils::split_args(argv[4], nth_strs, ',');
+        for (const auto &s : nth_strs) {
+            size_t nth;
+            if (!dsn::buf2uint64(s, nth)) {
+                fmt::print(stderr, "Invalid nth number: {}\n\n", s);
+
+                print_usage(argv[0]);
+                ::exit(-1);
+            }
+
+            if (nth >= array_size) {
+                fmt::print(stderr, "nth({}) should be < array_size({})\n\n", array_size);
+
+                print_usage(argv[0]);
+                ::exit(-1);
+            }
+
+            nths.push_back(nth);
+        }
+    }
+
+    run_bench(num_operations, array_size, range_size, nths);
+
+    return 0;
+}
diff --git a/src/utils/test/nth_element_test.cpp b/src/utils/test/nth_element_test.cpp
new file mode 100644
index 0000000000..44586ecf26
--- /dev/null
+++ b/src/utils/test/nth_element_test.cpp
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/nth_element.h>
+
+#include <fmt/format.h>
+#include <gtest/gtest.h>
+
+#include "nth_element_utils.h"
+
+namespace dsn {
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_integral<typename NthElementFinder::value_type>::value>::type>
+void run_integral_cases(const typename NthElementFinder::container_type &array,
+                        const typename NthElementFinder::nth_container_type &nths,
+                        const typename NthElementFinder::container_type &expected_elements)
+{
+    auto container = array;
+
+    NthElementFinder finder;
+    finder.set_nths(nths);
+    finder(container.begin(), container.begin(), container.end());
+
+    ASSERT_EQ(finder.elements(), expected_elements);
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_integral<typename NthElementFinder::value_type>::value>::type>
+void run_basic_int64_cases()
+{
+    // Test cases:
+    // - both the array and the nth list are empty
+    // - the array has only one element, and the nth list is empty
+    // - the array has only one element, and the nth list has only one element
+    // - the array has only one element, and the nth list has duplicate elements
+    // - the array has only 2 identical elements, and the nth list has only one element
+    // - the array has only 2 identical elements, and the nth list has both elements
+    // - the array has only 2 identical elements, and the nth list has duplicat elements
+    // - the array has only 2 ordered elements, and the nth list has only one element
+    // - the array has only 2 ordered elements, and the nth list has both elements
+    // - the array has only 2 ordered elements, and the nth list has duplicat elements
+    // - the array has only 2 unordered elements, and the nth list has only one element
+    // - the array has only 2 unordered elements, and the nth list has both elements
+    // - the array has only 2 unordered elements, and the nth list has duplicat elements
+    // - the array contains identical elements, and the nth list has only one element
+    // - the array contains identical elements, and the nth list has all elements
+    // - the array contains identical elements, and the nth list has duplicat elements
+    // - all elements in the array are identical, and the nth list has 2 elements
+    // - all elements in the array are identical, and the nth list has all elements
+    // - all elements in the array are identical, and the nth list has duplicat elements
+    // - each element in the array is different from others, and the nth list has 3 elements
+    // - each element in the array is different from others, and the nth list has all elements
+    // - each element in the array is different from others, and the nth list has duplicat elements
+    struct test_case
+    {
+        typename NthElementFinder::container_type array;
+        typename NthElementFinder::nth_container_type nths;
+        typename NthElementFinder::container_type expected_elements;
+    } tests[] = {{{}, {}, {}},
+                 {{1}, {}, {}},
+                 {{1}, {0}, {1}},
+                 {{1}, {0, 0}, {1, 1}},
+                 {{1, 1}, {1}, {1}},
+                 {{1, 1}, {0, 1}, {1, 1}},
+                 {{1, 1}, {1, 1}, {1, 1}},
+                 {{1, 2}, {1}, {2}},
+                 {{1, 2}, {0, 1}, {1, 2}},
+                 {{1, 2}, {1, 1}, {2, 2}},
+                 {{2, 1}, {1}, {2}},
+                 {{2, 1}, {0, 1}, {1, 2}},
+                 {{2, 1}, {0, 0}, {1, 1}},
+                 {{2, 1, 2, 3, 2}, {2}, {2}},
+                 {{2, 1, 2, 3, 2}, {0, 1, 2, 3, 4}, {1, 2, 2, 2, 3}},
+                 {{2, 1, 2, 3, 2}, {0, 0, 2, 2, 3, 3}, {1, 1, 2, 2, 2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {2, 3}, {2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {0, 1, 2, 3, 4, 5}, {2, 2, 2, 2, 2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {1, 1, 2, 2, 5, 5}, {2, 2, 2, 2, 2, 2}},
+                 {{5, 6, 2, 8, 1, 7}, {3, 4, 5}, {6, 7, 8}},
+                 {{5, 6, 2, 8, 1, 7}, {0, 1, 2, 3, 4, 5}, {1, 2, 5, 6, 7, 8}},
+                 {{5, 6, 2, 8, 1, 7}, {0, 0, 2, 2, 5, 5}, {1, 1, 5, 5, 8, 8}}};
+
+    for (const auto &test : tests) {
+        run_integral_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);
+    }
+}
+
+TEST(nth_element_test, basic_int64) { run_basic_int64_cases<stl_nth_element_finder<int64_t>>(); }
+
+template <typename NthElementFinder>
+void run_generated_int64_cases()
+{
+    // Test cases:
+    // - generate empty array with empty nth list
+    // - generate an array of only one element with the nth list of only one element
+    // - generate an array of 2 elements with the nth list of 2 elements
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000
+    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000
+    struct test_case
+    {
+        typename NthElementFinder::size_type array_size;
+        int64_t initial_value;
+        uint64_t range_size;
+        typename NthElementFinder::nth_container_type nths;
+    } tests[] = {{0, 0, 2, {}},
+                 {1, 0, 2, {0}},
+                 {2, 0, 2, {0, 1}},
+                 {5000, 0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};
+
+    for (const auto &test : tests) {
+        integral_nth_element_case_generator<int64_t> generator(
+            test.array_size, test.initial_value, test.range_size, test.nths);
+
+        integral_nth_element_case_generator<int64_t>::container_type array;
+        integral_nth_element_case_generator<int64_t>::container_type expected_elements;
+        generator(array, expected_elements);
+
+        run_integral_cases<NthElementFinder>(array, test.nths, expected_elements);
+    }
+}
+
+TEST(nth_element_test, generated_int64)
+{
+    run_generated_int64_cases<stl_nth_element_finder<int64_t>>();
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>
+void run_floating_cases(const typename NthElementFinder::container_type &array,
+                        const typename NthElementFinder::nth_container_type &nths,
+                        const typename NthElementFinder::container_type &expected_elements)
+{
+    auto container = array;
+
+    NthElementFinder finder;
+    finder.set_nths(nths);
+    finder(container.begin(), container.begin(), container.end());
+
+    ASSERT_EQ(finder.elements().size(), expected_elements.size());
+    for (typename NthElementFinder::size_type i = 0; i < finder.elements().size(); ++i) {
+        ASSERT_DOUBLE_EQ(finder.elements()[i], expected_elements[i]);
+    }
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>
+void run_basic_double_cases()
+{
+    // Test cases:
+    // - both the array and the nth list are empty
+    // - the array has only one element, and the nth list is empty
+    // - the array has only one element, and the nth list has only one element
+    // - the array has only one element, and the nth list has duplicate elements
+    // - the array has only 2 identical elements, and the nth list has only one element
+    // - the array has only 2 identical elements, and the nth list has both elements
+    // - the array has only 2 identical elements, and the nth list has duplicat elements
+    // - the array has only 2 ordered elements, and the nth list has only one element
+    // - the array has only 2 ordered elements, and the nth list has both elements
+    // - the array has only 2 ordered elements, and the nth list has duplicat elements
+    // - the array has only 2 unordered elements, and the nth list has only one element
+    // - the array has only 2 unordered elements, and the nth list has both elements
+    // - the array has only 2 unordered elements, and the nth list has duplicat elements
+    // - the array contains identical elements, and the nth list has only one element
+    // - the array contains identical elements, and the nth list has all elements
+    // - the array contains identical elements, and the nth list has duplicat elements
+    // - all elements in the array are identical, and the nth list has 2 elements
+    // - all elements in the array are identical, and the nth list has all elements
+    // - all elements in the array are identical, and the nth list has duplicat elements
+    // - each element in the array is different from others, and the nth list has 3 elements
+    // - each element in the array is different from others, and the nth list has all elements
+    struct test_case
+    {
+        typename NthElementFinder::container_type array;
+        typename NthElementFinder::nth_container_type nths;
+        typename NthElementFinder::container_type expected_elements;
+    } tests[] = {
+        {{}, {}, {}},
+        {{1.23}, {}, {}},
+        {{1.23}, {0}, {1.23}},
+        {{1.23}, {0, 0}, {1.23, 1.23}},
+        {{1.23, 1.23}, {1}, {1.23}},
+        {{1.23, 1.23}, {0, 1}, {1.23, 1.23}},
+        {{1.23, 1.23}, {1, 1}, {1.23, 1.23}},
+        {{1.23, 2.34}, {1}, {2.34}},
+        {{1.23, 2.34}, {0, 1}, {1.23, 2.34}},
+        {{1.23, 2.34}, {1, 1}, {2.34, 2.34}},
+        {{2.34, 1.23}, {1}, {2.34}},
+        {{2.34, 1.23}, {0, 1}, {1.23, 2.34}},
+        {{2.34, 1.23}, {0, 0}, {1.23, 1.23}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {2}, {2.34}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 1, 2, 3, 4}, {1.23, 2.34, 2.34, 2.34, 3.56}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 0, 2, 2, 3, 3}, {1.23, 1.23, 2.34, 2.34, 2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34}, {2, 3}, {2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},
+         {0, 1, 2, 3, 4, 5},
+         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},
+         {1, 1, 2, 2, 5, 5},
+         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89}, {3, 4, 5}, {6.78, 7.89, 8.90}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},
+         {0, 1, 2, 3, 4, 5},
+         {1.23, 2.34, 5.67, 6.78, 7.89, 8.90}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},
+         {0, 0, 2, 2, 5, 5},
+         {1.23, 1.23, 5.67, 5.67, 8.90, 8.90}}};
+
+    for (const auto &test : tests) {
+        run_floating_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);
+    }
+}
+
+TEST(nth_element_test, basic_double) { run_basic_double_cases<stl_nth_element_finder<double>>(); }
+
+template <typename NthElementFinder>
+void run_generated_double_cases()
+{
+    // Test cases:
+    // - generate empty array with empty nth list
+    // - generate an array of only one element with the nth list of only one element
+    // - generate an array of 2 elements with the nth list of 2 elements
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000
+    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000
+    struct test_case
+    {
+        typename NthElementFinder::size_type array_size;
+        double initial_value;
+        uint64_t range_size;
+        typename NthElementFinder::nth_container_type nths;
+    } tests[] = {{0, 0.0, 2, {}},
+                 {1, 0.0, 2, {0}},
+                 {2, 0.0, 2, {0, 1}},
+                 {5000, 0.0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};
+
+    for (const auto &test : tests) {
+        floating_nth_element_case_generator<double> generator(
+            test.array_size, test.initial_value, test.range_size, test.nths);
+
+        floating_nth_element_case_generator<double>::container_type array;
+        floating_nth_element_case_generator<double>::container_type expected_elements;
+        generator(array, expected_elements);
+
+        run_floating_cases<NthElementFinder>(array, test.nths, expected_elements);
+    }
+}
+
+TEST(nth_element_test, generated_double)
+{
+    run_generated_double_cases<stl_nth_element_finder<double>>();
+}
+
+} // namespace dsn
diff --git a/src/utils/test/nth_element_utils.h b/src/utils/test/nth_element_utils.h
new file mode 100644
index 0000000000..7b138b2cfb
--- /dev/null
+++ b/src/utils/test/nth_element_utils.h
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/rand.h>
+
+#include "perf_counter/perf_counter_atomic.h"
+
+namespace dsn {
+
+// The generator is used to produce the test cases randomly for unit tests and benchmarks
+// of nth elements.
+template <typename T,
+          typename Rand,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class nth_element_case_generator
+{
+public:
+    using value_type = T;
+    using container_type = typename std::vector<value_type>;
+    using size_type = typename container_type::size_type;
+    using nth_container_type = typename std::vector<size_type>;
+
+    nth_element_case_generator(size_type array_size,
+                               value_type initial_value,
+                               uint64_t range_size,
+                               const nth_container_type &nths)
+        : _array_size(array_size),
+          _initial_value(initial_value),
+          _range_size(range_size),
+          _nths(nths),
+          _rand(Rand())
+    {
+        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),
+                  "nth indexes({}) is not sorted",
+                  fmt::join(_nths, " "));
+
+        for (const auto &nth : _nths) {
+            dassert_f(
+                nth >= 0 && nth < _array_size, "nth should be in the range [0, {})", _array_size);
+        }
+    }
+
+    ~nth_element_case_generator() = default;
+
+    // Generate an out-of-order `array` sized `_array_size`, and put nth elements of sorted
+    // `array` to `elements` in the order of `_nths` which must be sorted.
+    //
+    // The process has 2 stages:
+    // (1) Generate a sorted `array` from _initial_value. Always generate next element by current
+    // element plus _rand(_range_size). Once the index of an element belongs to nth indexes, it
+    // will be appended to `elements`.
+    // (2) After the sorted `array` is generated, it will be shuffled to be out-of-order.
+    void operator()(container_type &array, container_type &elements)
+    {
+        array.clear();
+        elements.clear();
+
+        auto value = _initial_value;
+        for (size_type i = 0, j = 0; i < _array_size; ++i) {
+            array.push_back(value);
+            for (; j < _nths.size() && _nths[j] == i; ++j) {
+                elements.push_back(value);
+            }
+
+            auto delta = _rand(_range_size);
+            value += delta;
+        }
+        std::random_shuffle(array.begin(), array.end());
+    }
+
+private:
+    const size_type _array_size;
+    const value_type _initial_value;
+    const uint64_t _range_size;
+    const nth_container_type _nths;
+    const Rand _rand;
+
+    DISALLOW_COPY_AND_ASSIGN(nth_element_case_generator);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+class integral_rand_generator
+{
+public:
+    T operator()(const uint64_t &upper) const { return static_cast<T>(rand::next_u64(upper)); }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+using integral_nth_element_case_generator =
+    nth_element_case_generator<T, integral_rand_generator<T>>;
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+class floating_rand_generator
+{
+public:
+    T operator()(const uint64_t &upper) const
+    {
+        return static_cast<T>(rand::next_u64(upper)) +
+               static_cast<T>(rand::next_u64(upper)) / static_cast<T>(upper);
+    }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_nth_element_case_generator =
+    nth_element_case_generator<T, floating_rand_generator<T>>;
+
+// Finder class based on perf_counter in comparison with other finders for multiple nth elements.
+class perf_counter_nth_element_finder
+{
+public:
+    using container_type = typename std::vector<int64_t>;
+    using size_type = typename container_type::size_type;
+
+    perf_counter_nth_element_finder()
+        : _perf_counter("benchmark",
+                        "perf_counter_number_percentile_atomic",
+                        "nth_element",
+                        COUNTER_TYPE_NUMBER_PERCENTILES,
+                        "nth_element implementation by perf_counter_number_percentile_atomic",
+                        false),
+          _elements(COUNTER_PERCENTILE_COUNT, int64_t())
+    {
+    }
+
+    void load_data(const container_type &array)
+    {
+        _perf_counter._tail.store(0, std::memory_order_relaxed);
+        for (const auto &e : array) {
+            _perf_counter.set(e);
+        }
+    }
+
+    void operator()()
+    {
+        _perf_counter.calc(
+            boost::make_shared<dsn::perf_counter_number_percentile_atomic::compute_context>());
+        std::copy(_perf_counter._results,
+                  _perf_counter._results + COUNTER_PERCENTILE_COUNT,
+                  _elements.begin());
+    }
+
+    const container_type &elements() const { return _elements; }
+
+private:
+    dsn::perf_counter_number_percentile_atomic _perf_counter;
+    container_type _elements;
+
+    DISALLOW_COPY_AND_ASSIGN(perf_counter_nth_element_finder);
+};
+
+} // namespace dsn

From ce34b7df95e8b3a895e6122ddd33779aec25a857 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 17 Jun 2022 20:37:59 +0800
Subject: [PATCH 10/21] feat(new_metrics): implement the percentile (#1112)

---
 include/dsn/utility/alloc.h       |  84 +++++++++
 include/dsn/utility/metrics.h     | 300 +++++++++++++++++++++++++++++-
 include/dsn/utility/ports.h       |   3 +
 src/utils/alloc.cpp               |  54 ++++++
 src/utils/latency_tracer.cpp      |   1 +
 src/utils/metrics.cpp             |  50 ++++-
 src/utils/shared_io_service.cpp   |  60 ++++++
 src/utils/shared_io_service.h     |  40 ++--
 src/utils/test/metrics_test.cpp   | 262 +++++++++++++++++++++++++-
 src/utils/test/percentile_utils.h |  88 +++++++++
 10 files changed, 907 insertions(+), 35 deletions(-)
 create mode 100644 include/dsn/utility/alloc.h
 create mode 100644 src/utils/alloc.cpp
 create mode 100644 src/utils/shared_io_service.cpp
 create mode 100644 src/utils/test/percentile_utils.h

diff --git a/include/dsn/utility/alloc.h b/include/dsn/utility/alloc.h
new file mode 100644
index 0000000000..6a5d63e7ce
--- /dev/null
+++ b/include/dsn/utility/alloc.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <new>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+
+namespace dsn {
+
+#ifdef CACHELINE_SIZE
+
+extern void *cacheline_aligned_alloc(size_t size);
+
+extern void cacheline_aligned_free(void *mem_block);
+
+template <typename T>
+using cacheline_aligned_ptr = typename std::unique_ptr<T, std::function<void(void *)>>;
+
+template <typename T>
+cacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len)
+{
+    void *buffer = cacheline_aligned_alloc(sizeof(T) * len);
+    if (dsn_unlikely(buffer == nullptr)) {
+        return cacheline_aligned_ptr<T>(nullptr, [](void *) {});
+    }
+
+    T *array = new (buffer) T[len];
+
+#ifndef NDEBUG
+    if (sizeof(T) <= CACHELINE_SIZE && (sizeof(T) & (sizeof(T) - 1)) == 0) {
+        for (size_t i = 0; i < len; ++i) {
+            T *elem = &(array[i]);
+            dassert_f((reinterpret_cast<const uintptr_t>(elem) & (sizeof(T) - 1)) == 0,
+                      "unaligned array element for cache line: array={}, length={}, index={}, "
+                      "elem={}, elem_size={}, mask={}, cacheline_size={}",
+                      fmt::ptr(array),
+                      len,
+                      i,
+                      fmt::ptr(elem),
+                      sizeof(T),
+                      sizeof(T) - 1,
+                      CACHELINE_SIZE);
+        }
+    }
+#endif
+
+    return cacheline_aligned_ptr<T>(array, cacheline_aligned_free);
+}
+
+template <typename T>
+cacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len, const T &val)
+{
+    auto array = cacheline_aligned_alloc_array<T>(len);
+    if (array) {
+        std::fill(array.get(), array.get() + len, val);
+    }
+
+    return array;
+}
+
+#endif
+
+} // namespace dsn
diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 22da07b33c..0951233931 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,19 +17,29 @@
 
 #pragma once
 
+#include <algorithm>
 #include <atomic>
+#include <bitset>
+#include <functional>
+#include <memory>
 #include <mutex>
+#include <set>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
+#include <vector>
+
+#include <boost/asio/deadline_timer.hpp>
 
 #include <dsn/c/api_utilities.h>
 #include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/alloc.h>
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
 #include <dsn/utility/long_adder.h>
+#include <dsn/utility/nth_element.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
 #include <dsn/utility/string_view.h>
@@ -90,6 +100,14 @@
     dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name(                        \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
+// The percentile supports both integral and floating types.
+#define METRIC_DEFINE_percentile_int64(entity_type, name, unit, desc, ...)                         \
+    dsn::percentile_prototype<int64_t> METRIC_##name(                                              \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_percentile_double(entity_type, name, unit, desc, ...)                        \
+    dsn::floating_percentile_prototype<double> METRIC_##name(                                      \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
@@ -102,6 +120,10 @@
     extern dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name
 #define METRIC_DECLARE_concurrent_volatile_counter(name)                                           \
     extern dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name
+#define METRIC_DECLARE_percentile_int64(name)                                                      \
+    extern dsn::percentile_prototype<int64_t> METRIC_##name
+#define METRIC_DECLARE_percentile_double(name)                                                     \
+    extern dsn::floating_percentile_prototype<double> METRIC_##name
 
 namespace dsn {
 
@@ -393,13 +415,23 @@ template <typename Adder = striped_long_adder, bool IsVolatile = false>
 class counter : public metric
 {
 public:
-    template <bool Volatile = IsVolatile, typename = typename std::enable_if<!Volatile>::type>
+    // To decide which member function should be called by template parameter, the parameter
+    // should be one of the class template parameters in case that the parameter is needed to
+    // be written each time the member function is called.
+    //
+    // Using class template parameter to decide which member function should be called, another
+    // function template parameter with the same meaning should be introduced, since the class
+    // template parameter cannot be used as a function template parameter again and will lead
+    // to compilation error.
+    template <bool Volatile = IsVolatile,
+              typename = typename std::enable_if<!Volatile && !IsVolatile>::type>
     int64_t value() const
     {
         return _adder.value();
     }
 
-    template <bool Volatile = IsVolatile, typename = typename std::enable_if<Volatile>::type>
+    template <bool Volatile = IsVolatile,
+              typename = typename std::enable_if<Volatile && IsVolatile>::type>
     int64_t value()
     {
         return _adder.fetch_and_reset();
@@ -447,4 +479,268 @@ using concurrent_volatile_counter_ptr = counter_ptr<concurrent_long_adder, true>
 template <typename Adder = striped_long_adder>
 using volatile_counter_prototype = metric_prototype_with<counter<Adder, true>>;
 
+// All supported kinds of kth percentiles. User can configure required kth percentiles for
+// each percentile. Only configured kth percentiles will be computed. This can reduce CPU
+// consumption.
+enum class kth_percentile_type : size_t
+{
+    P50,
+    P90,
+    P95,
+    P99,
+    P999,
+    COUNT,
+    INVALID
+};
+
+// Support to load from configuration files for percentiles.
+ENUM_BEGIN(kth_percentile_type, kth_percentile_type::INVALID)
+ENUM_REG(kth_percentile_type::P50)
+ENUM_REG(kth_percentile_type::P90)
+ENUM_REG(kth_percentile_type::P95)
+ENUM_REG(kth_percentile_type::P99)
+ENUM_REG(kth_percentile_type::P999)
+ENUM_END(kth_percentile_type)
+
+const std::vector<double> kKthDecimals = {0.5, 0.9, 0.95, 0.99, 0.999};
+
+inline size_t kth_percentile_to_nth_index(size_t size, size_t kth_index)
+{
+    auto decimal = kKthDecimals[kth_index];
+    // Since the kth percentile is the value that is greater than k percent of the data values after
+    // ranking them (https://people.richland.edu/james/ictcm/2001/descriptive/helpposition.html),
+    // compute the nth index by size * decimal rather than size * decimal - 1.
+    return static_cast<size_t>(size * decimal);
+}
+
+inline size_t kth_percentile_to_nth_index(size_t size, kth_percentile_type type)
+{
+    return kth_percentile_to_nth_index(size, static_cast<size_t>(type));
+}
+
+std::set<kth_percentile_type> get_all_kth_percentile_types()
+{
+    std::set<kth_percentile_type> all_types;
+    for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+        all_types.insert(static_cast<kth_percentile_type>(i));
+    }
+    return all_types;
+}
+const std::set<kth_percentile_type> kAllKthPercentileTypes = get_all_kth_percentile_types();
+
+// `percentile_timer` is a timer class that encapsulates the details how each percentile is
+// computed periodically.
+//
+// To be instantiated, it requires `interval_ms` at which a percentile is computed and `exec`
+// which is used to compute percentile.
+//
+// In case that all percentiles are computed at the same time and lead to very high load,
+// first computation for percentile will be delayed at a random interval.
+class percentile_timer
+{
+public:
+    using exec_fn = std::function<void()>;
+
+    percentile_timer(uint64_t interval_ms, exec_fn exec);
+    ~percentile_timer() = default;
+
+    // Get the initial delay that is randomly generated by `generate_initial_delay_ms()`.
+    uint64_t get_initial_delay_ms() const { return _initial_delay_ms; }
+
+private:
+    // Generate an initial delay randomly in case that all percentiles are computed at the
+    // same time.
+    static uint64_t generate_initial_delay_ms(uint64_t interval_ms);
+
+    void on_timer(const boost::system::error_code &ec);
+
+    const uint64_t _initial_delay_ms;
+    const uint64_t _interval_ms;
+    const exec_fn _exec;
+    std::unique_ptr<boost::asio::deadline_timer> _timer;
+};
+
+// The percentile is a metric type that samples observations. The size of samples has an upper
+// bound. Once the maximum size is reached, the earliest observations will be overwritten.
+//
+// On the other hand, kth percentiles, such as P50, P90, P95, P99, P999, will be calculated
+// periodically over all samples. The kth percentiles which are calculated are configurable
+// provided that they are of valid kth_percentile_type (i.e. in kAllKthPercentileTypes).
+//
+// The most common usage of percentile is latency, such as server-level and replica-level
+// latencies. For example, if P99 latency is 10 ms, it means the latencies of 99% requests
+// are less than 10 ms.
+//
+// The percentile is implemented by the finder for nth elements. Each kth percentile is firstly
+// converted to nth index; then, find the element corresponding to the nth index.
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class percentile : public metric
+{
+public:
+    using value_type = T;
+    using size_type = typename NthElementFinder::size_type;
+
+    void set(const value_type &val)
+    {
+        const auto index = _tail.fetch_add(1, std::memory_order_relaxed);
+        _samples.get()[index & (_sample_size - 1)] = val;
+    }
+
+    // If `type` is not configured, it will return false with zero value stored in `val`;
+    // otherwise, it will always return true with the value corresponding to `type`.
+    bool get(kth_percentile_type type, value_type &val) const
+    {
+        const auto index = static_cast<size_t>(type);
+        dcheck_lt(index, static_cast<size_t>(kth_percentile_type::COUNT));
+
+        val = _full_nth_elements[index].load(std::memory_order_relaxed);
+        return _kth_percentile_bitset.test(index);
+    }
+
+    bool timer_enabled() const { return !!_timer; }
+
+    uint64_t get_initial_delay_ms() const
+    {
+        return timer_enabled() ? _timer->get_initial_delay_ms() : 0;
+    }
+
+    static const size_type kDefaultSampleSize = 4096;
+
+protected:
+    // interval_ms is the interval between the computations for percentiles. Its unit is
+    // milliseconds. It's suggested that interval_ms should be near the period between pulls
+    // from or pushes to the monitoring system.
+    percentile(const metric_prototype *prototype,
+               uint64_t interval_ms = 10000,
+               const std::set<kth_percentile_type> &kth_percentiles = kAllKthPercentileTypes,
+               size_type sample_size = kDefaultSampleSize)
+        : metric(prototype),
+          _sample_size(sample_size),
+          _last_real_sample_size(0),
+          _samples(cacheline_aligned_alloc_array<value_type>(sample_size, value_type{})),
+          _tail(0),
+          _kth_percentile_bitset(),
+          _full_nth_elements(static_cast<size_t>(kth_percentile_type::COUNT)),
+          _nth_element_finder(),
+          _timer()
+    {
+        dassert(_sample_size > 0 && (_sample_size & (_sample_size - 1)) == 0,
+                "sample_sizes should be > 0 and power of 2");
+
+        dassert(_samples, "_samples should be valid pointer");
+
+        for (const auto &kth : kth_percentiles) {
+            _kth_percentile_bitset.set(static_cast<size_t>(kth));
+        }
+
+        for (size_type i = 0; i < _full_nth_elements.size(); ++i) {
+            _full_nth_elements[i].store(value_type{}, std::memory_order_relaxed);
+        }
+
+#ifdef DSN_MOCK_TEST
+        if (interval_ms == 0) {
+            // Timer is disabled.
+            return;
+        }
+#else
+        dcheck_gt(interval_ms, 0);
+#endif
+
+        _timer.reset(new percentile_timer(
+            interval_ms,
+            std::bind(&percentile<value_type, NthElementFinder>::find_nth_elements, this)));
+    }
+
+    virtual ~percentile() = default;
+
+private:
+    using nth_container_type = typename NthElementFinder::nth_container_type;
+
+    friend class metric_entity;
+    friend class ref_ptr<percentile<value_type, NthElementFinder>>;
+
+    void find_nth_elements()
+    {
+        size_type real_sample_size = std::min(static_cast<size_type>(_tail.load()), _sample_size);
+        if (real_sample_size == 0) {
+            // No need to find since there has not been any sample yet.
+            return;
+        }
+
+        // If the size of samples changes, the nth indexs should be updated.
+        if (real_sample_size != _last_real_sample_size) {
+            set_real_nths(real_sample_size);
+            _last_real_sample_size = real_sample_size;
+        }
+
+        // Find nth elements.
+        std::vector<T> array(real_sample_size);
+        std::copy(_samples.get(), _samples.get() + real_sample_size, array.begin());
+        _nth_element_finder(array.begin(), array.begin(), array.end());
+
+        // Store nth elements.
+        const auto &elements = _nth_element_finder.elements();
+        for (size_t i = 0, next = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+            if (!_kth_percentile_bitset.test(i)) {
+                continue;
+            }
+            _full_nth_elements[i].store(elements[next++], std::memory_order_relaxed);
+        }
+    }
+
+    void set_real_nths(size_type real_sample_size)
+    {
+        nth_container_type nths;
+        for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+            if (!_kth_percentile_bitset.test(i)) {
+                continue;
+            }
+
+            auto size = static_cast<size_t>(real_sample_size);
+            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, i));
+            nths.push_back(nth);
+        }
+
+        _nth_element_finder.set_nths(nths);
+    }
+
+    const size_type _sample_size;
+    size_type _last_real_sample_size;
+    cacheline_aligned_ptr<value_type> _samples;
+    std::atomic<uint64_t> _tail; // use unsigned int to avoid running out of bound
+    std::bitset<static_cast<size_t>(kth_percentile_type::COUNT)> _kth_percentile_bitset;
+    std::vector<std::atomic<value_type>> _full_nth_elements;
+    NthElementFinder _nth_element_finder;
+
+    std::unique_ptr<percentile_timer> _timer;
+};
+
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+using percentile_ptr = ref_ptr<percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+using percentile_prototype = metric_prototype_with<percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile = percentile<T, NthElementFinder>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_ptr = ref_ptr<floating_percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_prototype =
+    metric_prototype_with<floating_percentile<T, NthElementFinder>>;
+
 } // namespace dsn
diff --git a/include/dsn/utility/ports.h b/include/dsn/utility/ports.h
index 6e2b182755..568fbf231b 100644
--- a/include/dsn/utility/ports.h
+++ b/include/dsn/utility/ports.h
@@ -102,6 +102,9 @@
 
 // This is a NOP if CACHELINE_SIZE is not defined.
 #ifdef CACHELINE_SIZE
+static_assert((CACHELINE_SIZE & (CACHELINE_SIZE - 1)) == 0 &&
+                  (CACHELINE_SIZE & (sizeof(void *) - 1)) == 0,
+              "CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *)");
 #define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
 #else
 #define CACHELINE_ALIGNED
diff --git a/src/utils/alloc.cpp b/src/utils/alloc.cpp
new file mode 100644
index 0000000000..fbf641a7e2
--- /dev/null
+++ b/src/utils/alloc.cpp
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/alloc.h>
+
+#include <cstdlib>
+
+#include <dsn/utility/safe_strerror_posix.h>
+
+namespace dsn {
+
+#ifdef CACHELINE_SIZE
+
+/* extern */ void *cacheline_aligned_alloc(size_t size)
+{
+    if (dsn_unlikely(size == 0)) {
+        return nullptr;
+    }
+
+    void *buffer = nullptr;
+    // CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *), which have been
+    // checked statically at compile time when CACHELINE_SIZE is defined as macro.
+    int err = posix_memalign(&buffer, CACHELINE_SIZE, size);
+
+    // Generally there are 2 possible errors for posix_memalign as below:
+    // [EINVAL]
+    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).
+    // [ENOMEM]
+    //     There is insufficient memory available with the requested alignment.
+    // Thus making an assertion here is enough.
+    dassert_f(err == 0, "error calling posix_memalign: {}", utils::safe_strerror(err).c_str());
+
+    return buffer;
+}
+
+/* extern */ void cacheline_aligned_free(void *mem_block) { free(mem_block); }
+
+#endif
+
+} // namespace dsn
diff --git a/src/utils/latency_tracer.cpp b/src/utils/latency_tracer.cpp
index 863846da3a..e32d257ae9 100644
--- a/src/utils/latency_tracer.cpp
+++ b/src/utils/latency_tracer.cpp
@@ -19,6 +19,7 @@
 #include <dsn/perf_counter/perf_counters.h>
 #include <dsn/service_api_c.h>
 #include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/config_api.h>
 #include <dsn/utility/flags.h>
 
 #include <utility>
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 0ec1415cac..656e49363f 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -18,6 +18,9 @@
 #include <dsn/utility/metrics.h>
 
 #include <dsn/c/api_utilities.h>
+#include <dsn/utility/rand.h>
+
+#include "shared_io_service.h"
 
 namespace dsn {
 
@@ -64,7 +67,15 @@ metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name)
 
 metric_entity_prototype::~metric_entity_prototype() {}
 
-metric_registry::metric_registry() {}
+metric_registry::metric_registry()
+{
+    // We should ensure that metric_registry is destructed before shared_io_service is destructed.
+    // Once shared_io_service is destructed before metric_registry is destructed,
+    // boost::asio::io_service needed by metrics in metric_registry such as percentile_timer will
+    // be released firstly, then will lead to heap-use-after-free error since percentiles in
+    // metric_registry are still running but the resources they needed have been released.
+    tools::shared_io_service::instance();
+}
 
 metric_registry::~metric_registry() {}
 
@@ -100,4 +111,41 @@ metric_prototype::~metric_prototype() {}
 
 metric::metric(const metric_prototype *prototype) : _prototype(prototype) {}
 
+uint64_t percentile_timer::generate_initial_delay_ms(uint64_t interval_ms)
+{
+    dcheck_gt(interval_ms, 0);
+
+    if (interval_ms < 1000) {
+        return rand::next_u64() % interval_ms + 50;
+    }
+
+    uint64_t interval_seconds = interval_ms / 1000;
+    return (rand::next_u64() % interval_seconds + 1) * 1000 + rand::next_u64() % 1000;
+}
+
+percentile_timer::percentile_timer(uint64_t interval_ms, exec_fn exec)
+    : _initial_delay_ms(generate_initial_delay_ms(interval_ms)),
+      _interval_ms(interval_ms),
+      _exec(exec),
+      _timer(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios))
+{
+    _timer->expires_from_now(boost::posix_time::milliseconds(_initial_delay_ms));
+    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));
+}
+
+void percentile_timer::on_timer(const boost::system::error_code &ec)
+{
+    if (dsn_unlikely(!!ec)) {
+        dassert_f(ec == boost::system::errc::operation_canceled,
+                  "failed to exec on_timer with an error that cannot be handled: {}",
+                  ec.message());
+        return;
+    }
+
+    _exec();
+
+    _timer->expires_from_now(boost::posix_time::milliseconds(_interval_ms));
+    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));
+}
+
 } // namespace dsn
diff --git a/src/utils/shared_io_service.cpp b/src/utils/shared_io_service.cpp
new file mode 100644
index 0000000000..d4a82d95b6
--- /dev/null
+++ b/src/utils/shared_io_service.cpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "shared_io_service.h"
+
+#include <dsn/utility/flags.h>
+
+namespace dsn {
+namespace tools {
+
+const uint32_t kMinTimerServiceWorkerCount = 3;
+DSN_DEFINE_uint32("core",
+                  timer_service_worker_count,
+                  kMinTimerServiceWorkerCount,
+                  "the number of threads for timer service");
+DSN_DEFINE_validator(timer_service_worker_count, [](uint32_t worker_count) -> bool {
+    if (worker_count < kMinTimerServiceWorkerCount) {
+        derror("timer_service_worker_count should be at least 3, where one thread is used to "
+               "collect all metrics from registery for monitoring systems, and another two threads "
+               "are used to compute percentiles.");
+        return false;
+    }
+    return true;
+});
+
+shared_io_service::shared_io_service()
+{
+    _workers.reserve(FLAGS_timer_service_worker_count);
+    for (uint32_t i = 0; i < FLAGS_timer_service_worker_count; ++i) {
+        _workers.emplace_back([this]() {
+            boost::asio::io_service::work work(ios);
+            ios.run();
+        });
+    }
+}
+
+shared_io_service::~shared_io_service()
+{
+    ios.stop();
+    for (auto &worker : _workers) {
+        worker.join();
+    }
+}
+
+} // namespace tools
+} // namespace dsn
diff --git a/src/utils/shared_io_service.h b/src/utils/shared_io_service.h
index 8438498ec5..df6db1326f 100644
--- a/src/utils/shared_io_service.h
+++ b/src/utils/shared_io_service.h
@@ -36,10 +36,10 @@
 #pragma once
 
 #include <thread>
-#include <memory>
 #include <vector>
+
 #include <boost/asio.hpp>
-#include <dsn/utility/config_api.h>
+
 #include <dsn/utility/singleton.h>
 
 namespace dsn {
@@ -51,34 +51,16 @@ namespace tools {
 class shared_io_service : public utils::singleton<shared_io_service>
 {
 public:
-    shared_io_service()
-    {
-        _io_service_worker_count =
-            (int)dsn_config_get_value_uint64("core",
-                                             "timer_service_worker_count",
-                                             2,
-                                             "thread number for timer service for core itself");
-        for (int i = 0; i < _io_service_worker_count; i++) {
-            _workers.push_back(std::shared_ptr<std::thread>(new std::thread([this]() {
-                boost::asio::io_service::work work(ios);
-                ios.run();
-            })));
-        }
-    }
-
-    ~shared_io_service()
-    {
-        ios.stop();
-        for (auto worker : _workers) {
-            worker->join();
-        }
-    }
-
     boost::asio::io_service ios;
 
 private:
-    int _io_service_worker_count;
-    std::vector<std::shared_ptr<std::thread>> _workers;
+    friend class utils::singleton<shared_io_service>;
+
+    shared_io_service();
+    ~shared_io_service();
+
+    std::vector<std::thread> _workers;
 };
-}
-}
+
+} // namespace tools
+} // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 6ba5b65687..5af3117328 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -18,11 +18,14 @@
 #include <dsn/utility/metrics.h>
 #include <dsn/utility/rand.h>
 
+#include <chrono>
 #include <thread>
 #include <vector>
 
 #include <gtest/gtest.h>
 
+#include "percentile_utils.h"
+
 namespace dsn {
 
 class my_gauge : public metric
@@ -106,6 +109,16 @@ METRIC_DEFINE_concurrent_volatile_counter(my_server,
                                           dsn::metric_unit::kRequests,
                                           "a server-level concurrent_volatile_counter for test");
 
+METRIC_DEFINE_percentile_int64(my_server,
+                               test_percentile_int64,
+                               dsn::metric_unit::kNanoSeconds,
+                               "a server-level percentile of int64 type for test");
+
+METRIC_DEFINE_percentile_double(my_server,
+                                test_percentile_double,
+                                dsn::metric_unit::kNanoSeconds,
+                                "a server-level percentile of double type for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -345,7 +358,7 @@ TEST(metrics_test, gauge_double)
 void execute(int64_t num_threads, std::function<void(int)> runner)
 {
     std::vector<std::thread> threads;
-    for (int64_t i = 0; i < num_threads; i++) {
+    for (int64_t i = 0; i < num_threads; ++i) {
         threads.emplace_back([i, &runner]() { runner(i); });
     }
     for (auto &t : threads) {
@@ -388,7 +401,7 @@ void run_increment_by(MetricPtr &my_metric,
         deltas.push_back(delta);
     }
 
-    execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
+    execute(num_threads, [num_operations, &my_metric, &deltas](int64_t tid) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
             auto delta = deltas[tid * num_operations + i];
             increment_by(std::integral_constant<bool, IsIncrement>{}, my_metric, delta);
@@ -555,7 +568,7 @@ void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_me
 
     execute(num_threads_write + num_threads_read,
             [num_operations, num_threads_write, &my_metric, &deltas, &results, &completed](
-                int tid) mutable {
+                int64_t tid) mutable {
                 if (tid < num_threads_write) {
                     for (int64_t i = 0; i < num_operations; ++i) {
                         my_metric->increment_by(deltas[tid * num_operations + i]);
@@ -646,4 +659,247 @@ TEST(metrics_test, volatile_counter)
     run_volatile_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_volatile_counter);
 }
 
+template <typename T, typename Prototype, typename Checker>
+void run_percentile(const metric_entity_ptr &my_entity,
+                    const Prototype &prototype,
+                    const std::vector<T> &data,
+                    size_t num_preload,
+                    uint64_t interval_ms,
+                    uint64_t exec_ms,
+                    const std::set<kth_percentile_type> &kth_percentiles,
+                    size_t sample_size,
+                    size_t num_threads,
+                    const std::vector<T> &expected_elements,
+                    Checker checker)
+{
+    dassert_f(num_threads > 0, "Invalid num_threads({})", num_threads);
+    dassert_f(data.size() <= sample_size && data.size() % num_threads == 0,
+              "Invalid arguments, data_size={}, sample_size={}, num_threads={}",
+              data.size(),
+              sample_size,
+              num_threads);
+
+    auto my_metric = prototype.instantiate(my_entity, interval_ms, kth_percentiles, sample_size);
+
+    // Preload zero in current thread.
+    for (size_t i = 0; i < num_preload; ++i) {
+        my_metric->set(0);
+    }
+
+    // Load other data in each spawned thread evenly.
+    const size_t num_operations = data.size() / num_threads;
+    execute(static_cast<int64_t>(num_threads),
+            [num_operations, &my_metric, &data](int64_t tid) mutable {
+                for (size_t i = 0; i < num_operations; ++i) {
+                    my_metric->set(data[static_cast<size_t>(tid) * num_operations + i]);
+                }
+            });
+
+    // Wait a while in order to finish computing all percentiles.
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(my_metric->get_initial_delay_ms() + interval_ms + exec_ms));
+
+    // Check if actual elements of kth percentiles are equal to the expected ones.
+    std::vector<T> actual_elements;
+    for (const auto &kth : kAllKthPercentileTypes) {
+        T value;
+        if (kth_percentiles.find(kth) == kth_percentiles.end()) {
+            ASSERT_FALSE(my_metric->get(kth, value));
+            checker(value, 0);
+        } else {
+            ASSERT_TRUE(my_metric->get(kth, value));
+            actual_elements.push_back(value);
+        }
+    }
+    checker(actual_elements, expected_elements);
+
+    // Check if this percentile is included in the entity.
+    auto metrics = my_entity->metrics();
+    ASSERT_EQ(metrics[&prototype].get(), static_cast<metric *>(my_metric.get()));
+
+    // Check if the prototype is referenced by this percentile.
+    ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(&prototype));
+}
+
+template <typename T, typename Prototype, typename CaseGenerator, typename Checker>
+void run_percentile_cases(const Prototype &prototype)
+{
+    using value_type = T;
+    const auto p50 = kth_percentile_type::P50;
+    const auto p90 = kth_percentile_type::P90;
+    const auto p99 = kth_percentile_type::P99;
+
+    // Test cases:
+    // - input none of sample with none of kth percentile
+    // - input 1 sample with none of kth percentile
+    // - input 1 sample with 1 kth percentile
+    // - input 1 sample with 2 kth percentiles
+    // - input 1 sample with all kth percentiles
+    // - input 1 sample with 1 kth percentile, capacity of 2
+    // - input 1 sample with 2 kth percentiles, capacity of 2
+    // - input 1 sample with all kth percentiles, capacity of 2
+    // - input 2 samples with 1 kth percentile
+    // - input 2 samples with 2 kth percentiles
+    // - input 2 samples with all kth percentiles
+    // - input 10 samples with 1 kth percentile, capacity of 16
+    // - input 10 samples with 2 kth percentiles, capacity of 16
+    // - input 10 samples with all kth percentiles, capacity of 16
+    // - input 10 samples with 1 kth percentile by 2 threads, capacity of 16
+    // - input 10 samples with 2 kth percentiles by 2 threads, capacity of 16
+    // - input 10 samples with all kth percentiles by 2 threads, capacity of 16
+    // - input 16 samples with 1 kth percentile
+    // - input 16 samples with 2 kth percentiles
+    // - input 16 samples with all kth percentiles
+    // - input 16 samples with 1 kth percentile by 2 threads
+    // - input 16 samples with 2 kth percentiles by 2 threads
+    // - input 16 samples with all kth percentiles by 2 threads
+    // - preload 5 samples and input 16 samples with 1 kth percentile by 2 threads
+    // - preload 5 samples and input 16 samples with 2 kth percentiles by 2 threads
+    // - preload 5 samples and input 16 samples with all kth percentiles by 2 threads
+    // - input 2000 samples with 1 kth percentile, capacity of 4096
+    // - input 2000 samples with 2 kth percentiles, capacity of 4096
+    // - input 2000 samples with all kth percentiles, capacity of 4096
+    // - input 2000 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - input 2000 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - input 2000 samples with all kth percentiles by 4 threads, capacity of 4096
+    // - input 4096 samples with 1 kth percentile, capacity of 4096
+    // - input 4096 samples with 2 kth percentiles, capacity of 4096
+    // - input 4096 samples with all kth percentiles, capacity of 4096
+    // - input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - input 4096 samples with all kth percentiles by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with all kth percentiles by 4 threads, capacity of 4096
+    struct test_case
+    {
+        std::string entity_id;
+        size_t data_size;
+        value_type initial_value;
+        uint64_t range_size;
+        size_t num_preload;
+        uint64_t interval_ms;
+        uint64_t exec_ms;
+        const std::set<kth_percentile_type> kth_percentiles;
+        size_t sample_size;
+        size_t num_threads;
+    } tests[] = {{"server_19", 0, 0, 2, 0, 50, 10, {}, 1, 1},
+                 {"server_20", 1, 0, 2, 0, 50, 10, {}, 1, 1},
+                 {"server_21", 1, 0, 2, 0, 50, 10, {p90}, 1, 1},
+                 {"server_22", 1, 0, 2, 0, 50, 10, {p50, p99}, 1, 1},
+                 {"server_23", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 1, 1},
+                 {"server_24", 1, 0, 2, 0, 50, 10, {p90}, 2, 1},
+                 {"server_25", 1, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},
+                 {"server_26", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},
+                 {"server_27", 2, 0, 2, 0, 50, 10, {p90}, 2, 1},
+                 {"server_28", 2, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},
+                 {"server_29", 2, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},
+                 {"server_30", 10, 0, 2, 0, 50, 10, {p90}, 16, 1},
+                 {"server_31", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},
+                 {"server_32", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},
+                 {"server_33", 10, 0, 2, 0, 50, 10, {p90}, 16, 2},
+                 {"server_34", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},
+                 {"server_35", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_36", 16, 0, 2, 0, 50, 10, {p90}, 16, 1},
+                 {"server_37", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},
+                 {"server_38", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},
+                 {"server_39", 16, 0, 2, 0, 50, 10, {p90}, 16, 2},
+                 {"server_40", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},
+                 {"server_41", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_42", 16, 0, 2, 5, 50, 10, {p90}, 16, 2},
+                 {"server_43", 16, 0, 2, 5, 50, 10, {p50, p99}, 16, 2},
+                 {"server_44", 16, 0, 2, 5, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_45", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 1},
+                 {"server_46", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},
+                 {"server_47", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},
+                 {"server_48", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 4},
+                 {"server_49", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_50", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},
+                 {"server_51", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 1},
+                 {"server_52", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},
+                 {"server_53", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},
+                 {"server_54", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 4},
+                 {"server_55", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_56", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},
+                 {"server_57", 4096, 0, 5, 5, 50, 10, {p90}, 4096, 4},
+                 {"server_58", 4096, 0, 5, 5, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_59", 4096, 0, 5, 5, 50, 10, kAllKthPercentileTypes, 4096, 4}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        CaseGenerator generator(
+            test.data_size, test.initial_value, test.range_size, test.kth_percentiles);
+
+        std::vector<value_type> data;
+        std::vector<value_type> expected_elements;
+        generator(data, expected_elements);
+
+        run_percentile<value_type, Prototype, Checker>(my_server_entity,
+                                                       prototype,
+                                                       data,
+                                                       test.num_preload,
+                                                       test.interval_ms,
+                                                       test.exec_ms,
+                                                       test.kth_percentiles,
+                                                       test.sample_size,
+                                                       test.num_threads,
+                                                       expected_elements,
+                                                       Checker());
+    }
+}
+
+template <typename T>
+class integral_checker
+{
+public:
+    void operator()(const T &actual_element, const T &expected_element) const
+    {
+        ASSERT_EQ(actual_element, expected_element);
+    }
+
+    void operator()(const std::vector<T> &actual_elements,
+                    const std::vector<T> &expected_elements) const
+    {
+        ASSERT_EQ(actual_elements, expected_elements);
+    }
+};
+
+TEST(metrics_test, percentile_int64)
+{
+    using value_type = int64_t;
+    run_percentile_cases<value_type,
+                         percentile_prototype<value_type>,
+                         integral_percentile_case_generator<value_type>,
+                         integral_checker<value_type>>(METRIC_test_percentile_int64);
+}
+
+template <typename T>
+class floating_checker
+{
+public:
+    void operator()(const T &actual_element, const T &expected_element) const
+    {
+        ASSERT_DOUBLE_EQ(actual_element, expected_element);
+    }
+
+    void operator()(const std::vector<T> &actual_elements,
+                    const std::vector<T> &expected_elements) const
+    {
+        ASSERT_EQ(actual_elements.size(), expected_elements.size());
+        for (size_t i = 0; i < expected_elements.size(); ++i) {
+            ASSERT_DOUBLE_EQ(actual_elements[i], expected_elements[i]);
+        }
+    }
+};
+
+TEST(metrics_test, percentile_double)
+{
+    using value_type = double;
+    run_percentile_cases<value_type,
+                         floating_percentile_prototype<value_type>,
+                         floating_percentile_case_generator<value_type>,
+                         floating_checker<value_type>>(METRIC_test_percentile_double);
+}
+
 } // namespace dsn
diff --git a/src/utils/test/percentile_utils.h b/src/utils/test/percentile_utils.h
new file mode 100644
index 0000000000..b764f5e0da
--- /dev/null
+++ b/src/utils/test/percentile_utils.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <vector>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/utility/metrics.h>
+#include <dsn/dist/fmt_logging.h>
+
+#include "nth_element_utils.h"
+
+namespace dsn {
+
+// The generator is used to produce the test cases randomly for unit tests and benchmarks of
+// percentile. This is implemented by converting kth percentiles to nth indexes, and calling
+// nth_element_case_generator to generate data and nth elements.
+template <typename NthElementCaseGenerator,
+          typename = typename std::enable_if<
+              std::is_arithmetic<typename NthElementCaseGenerator::value_type>::value>::type>
+class percentile_case_generator
+{
+public:
+    using value_type = typename NthElementCaseGenerator::value_type;
+    using container_type = typename NthElementCaseGenerator::container_type;
+    using size_type = typename NthElementCaseGenerator::size_type;
+    using nth_container_type = typename NthElementCaseGenerator::nth_container_type;
+
+    percentile_case_generator(size_type data_size,
+                              value_type initial_value,
+                              uint64_t range_size,
+                              const std::set<kth_percentile_type> &kth_percentiles)
+        : _nth_element_gen()
+    {
+        nth_container_type nths;
+        nths.reserve(kth_percentiles.size());
+        for (const auto &kth : kth_percentiles) {
+            auto size = static_cast<size_t>(data_size);
+            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, kth));
+            nths.push_back(nth);
+        }
+
+        _nth_element_gen.reset(
+            new NthElementCaseGenerator(data_size, initial_value, range_size, nths));
+    }
+
+    ~percentile_case_generator() = default;
+
+    // Call nth_element_case_generator internally to generate out-of-order `data` sized `data_size`
+    // and nth elements. See nth_element_case_generator for detailed implementations.
+    void operator()(container_type &data, container_type &elements)
+    {
+        (*_nth_element_gen)(data, elements);
+    }
+
+private:
+    std::unique_ptr<NthElementCaseGenerator> _nth_element_gen;
+
+    DISALLOW_COPY_AND_ASSIGN(percentile_case_generator);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+using integral_percentile_case_generator =
+    percentile_case_generator<integral_nth_element_case_generator<T>>;
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_case_generator =
+    percentile_case_generator<floating_nth_element_case_generator<T>>;
+
+} // namespace dsn

From 42492b13610bb86da2cb2f631eafd4ae44357c6b Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 28 Jan 2022 16:17:08 +0800
Subject: [PATCH 11/21] feat: implement long adder to optimize the counter of
 new metrics system (#1033)

---
 include/dsn/utility/long_adder.h              | 237 +++++++++++++++
 include/dsn/utility/ports.h                   |  28 ++
 src/utils/CMakeLists.txt                      |   1 +
 src/utils/long_adder.cpp                      | 280 ++++++++++++++++++
 src/utils/long_adder_bench/CMakeLists.txt     |  39 +++
 .../long_adder_bench/long_adder_bench.cpp     | 198 +++++++++++++
 src/utils/test/long_adder_test.cpp            | 265 +++++++++++++++++
 7 files changed, 1048 insertions(+)
 create mode 100644 include/dsn/utility/long_adder.h
 create mode 100644 src/utils/long_adder.cpp
 create mode 100644 src/utils/long_adder_bench/CMakeLists.txt
 create mode 100644 src/utils/long_adder_bench/long_adder_bench.cpp
 create mode 100644 src/utils/test/long_adder_test.cpp

diff --git a/include/dsn/utility/long_adder.h b/include/dsn/utility/long_adder.h
new file mode 100644
index 0000000000..99a3d2d3bf
--- /dev/null
+++ b/include/dsn/utility/long_adder.h
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <functional>
+#include <memory>
+
+#include <dsn/utility/ports.h>
+
+// Refer to https://github.com/apache/kudu/blob/master/src/kudu/util/striped64.h
+
+namespace dsn {
+
+// Padded POD container for std::atomic<int64_t>. This prevents false sharing of cache lines.
+// Notice that in older versions of GCC `std::is_pod<std::atomic<int64_t>>::value` will return
+// false, thus cacheline_aligned_int64 is not considered to be a POD. However it doesn't matter.
+class cacheline_aligned_int64
+{
+public:
+    static constexpr int kAtomicInt64Size = sizeof(std::atomic<int64_t>);
+
+    cacheline_aligned_int64() = default;
+
+    inline bool compare_and_set(int64_t cmp, int64_t value)
+    {
+        return _value.compare_exchange_weak(cmp, value);
+    }
+
+    // Padding advice from Herb Sutter:
+    // http://www.drdobbs.com/parallel/eliminate-false-sharing/217500206?pgno=4
+    std::atomic<int64_t> _value;
+    char pad[CACHELINE_SIZE > kAtomicInt64Size ? CACHELINE_SIZE - kAtomicInt64Size : 1];
+
+    DISALLOW_COPY_AND_ASSIGN(cacheline_aligned_int64);
+} CACHELINE_ALIGNED;
+
+using cacheline_aligned_int64_ptr =
+    std::unique_ptr<cacheline_aligned_int64, std::function<void(cacheline_aligned_int64 *)>>;
+extern cacheline_aligned_int64_ptr new_cacheline_aligned_int64();
+extern cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size);
+
+// This set of classes is heavily derived from JSR166e, released into the public domain
+// by Doug Lea and the other authors.
+//
+// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/Striped64.java?view=co
+// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/LongAdder.java?view=co
+//
+// The striped64 and striped_long_adder implementations here are simplified versions of what's
+// present in JSR166e. However, the core ideas remain the same.
+//
+// Updating a single AtomicInteger in a multi-threaded environment can be quite slow:
+//
+//   1. False sharing of cache lines with other counters.
+//   2. Cache line bouncing from high update rates, especially with many cores.
+//
+// These two problems are addressed by striped64. When there is no contention, it uses CAS on a
+// single base counter to store updates. However, when striped64 detects contention
+// (via a failed CAS operation), it will allocate a small, fixed size hashtable of Cells.
+// A cacheline_aligned_int64 is a simple POD that pads out an atomic<int64_t> to 64 bytes to prevent
+// sharing a cache line.
+//
+// Reading the value of a striped64 requires traversing the hashtable to calculate the true sum.
+//
+// Each updating thread uses a thread-local hashcode to determine its cacheline_aligned_int64 in the
+// hashtable. If a thread fails to CAS its hashed cacheline_aligned_int64, it will do a lightweight
+// rehash operation to try and find an uncontended bucket. Because the hashcode is thread-local,
+// this rehash affects all striped64's accessed by the thread. This is good, since contention on one
+// striped64 is indicative of contention elsewhere too.
+//
+// The hashtable is statically sized to the nearest power of 2 greater than or equal to the
+// number of CPUs. This is sufficient, since this guarantees the existence of a perfect hash
+// function. Due to the random rehashing, the threads should eventually converge to this function.
+// In practice, this scheme has shown to be sufficient.
+//
+// The biggest simplification of this implementation compared to JSR166e is that we do not
+// dynamically grow the table, instead immediately allocating it to the full size.
+// We also do not lazily allocate each cacheline_aligned_int64, instead allocating the entire array
+// at once. This means we waste some additional memory in low contention scenarios, and initial
+// allocation will also be slower. Some of the micro-optimizations were also elided for readability.
+class striped64
+{
+public:
+    striped64() = default;
+
+protected:
+    // NOTE: the destructor is not virtual so that we can ensure that striped64
+    // has no vtable, thus reducing its size. We make it protected to ensure that
+    // no one attempts to delete a striped64* and invokes the wrong destructor.
+    ~striped64() = default;
+
+    enum rehash
+    {
+        kRehash,
+        kNoRehash
+    };
+
+    // CAS the base field.
+    inline bool cas_base(int64_t cmp, int64_t val) { return _base.compare_exchange_weak(cmp, val); }
+
+    // Handles cases of updates involving initialization, resizing, creating new Cells, and/or
+    // contention. See above for further explanation.
+    //
+    // 'Updater' should be a function which takes the current value and returns
+    // the new value.
+    template <class Updater>
+    void retry_update(rehash to_rehash, Updater updater);
+
+    // Sets base and all cells to the given value.
+    void internal_reset(int64_t initial_value);
+
+    // Base value, used mainly when there is no contention, but also as a fallback during
+    // table initialization races. Updated via CAS.
+    std::atomic<int64_t> _base{0};
+
+    // Memory manager of cells. Once the destructor is called, cells will be freed.
+    cacheline_aligned_int64_ptr _cells_holder;
+
+    // Table of cells. When non-null, size is the nearest power of 2 >= NCPU.
+    // If this is set to -1, the pointer is 'locked' and some thread is in the
+    // process of allocating the array.
+    std::atomic<cacheline_aligned_int64 *> _cells{nullptr};
+
+    static uint64_t get_tls_hashcode();
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(striped64);
+
+    // Static hash code per-thread. Shared across all instances to limit thread-local pollution.
+    // Also, if a thread hits a collision on one striped64, it's also likely to collide on
+    // other striped64s too.
+    static __thread uint64_t _tls_hashcode;
+};
+
+// A 64-bit number optimized for high-volume concurrent updates.
+// See striped64 for a longer explanation of the inner workings.
+class striped_long_adder : striped64
+{
+public:
+    striped_long_adder() = default;
+
+    ~striped_long_adder() = default;
+
+    void increment_by(int64_t x);
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    int64_t value() const;
+
+    // Call reset() ONLY when necessary.
+    inline void reset() { set(0); }
+
+    // Return the value immediately before it's reset.
+    int64_t fetch_and_reset();
+
+private:
+    // `set` is not exposed since it's not an efficient operation
+    void set(int64_t val) { internal_reset(val); }
+
+    DISALLOW_COPY_AND_ASSIGN(striped_long_adder);
+};
+
+class concurrent_long_adder
+{
+public:
+    concurrent_long_adder();
+    ~concurrent_long_adder() = default;
+
+    void increment_by(int64_t x);
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    int64_t value() const;
+
+    // Call reset() ONLY when necessary.
+    inline void reset() { set(0); }
+
+    // Return the value immediately before it's reset.
+    int64_t fetch_and_reset();
+
+private:
+    // `set` is not exposed since it's not an efficient operation
+    void set(int64_t val);
+
+    cacheline_aligned_int64_ptr _cells_holder;
+    cacheline_aligned_int64 *_cells;
+
+    DISALLOW_COPY_AND_ASSIGN(concurrent_long_adder);
+};
+
+// Use template to wrap a long_adder implementation rather than inherit from a base class for
+// the reason that virtual function will increase the class size and slow down the execution.
+template <typename Adder>
+class long_adder_wrapper
+{
+public:
+    long_adder_wrapper() = default;
+
+    ~long_adder_wrapper() = default;
+
+    inline void increment_by(int64_t x) { adder.increment_by(x); }
+    inline void increment() { increment_by(1); }
+    inline void decrement() { increment_by(-1); }
+
+    // Returns the current value.
+    // Note this is not an atomic snapshot in the presence of concurrent updates.
+    inline int64_t value() const { return adder.value(); }
+
+    // Resets the counter state to zero. Call it ONLY when necessary.
+    inline void reset() { adder.reset(); }
+
+    // Return the value immediately before it's reset.
+    inline int64_t fetch_and_reset() { return adder.fetch_and_reset(); }
+
+private:
+    Adder adder;
+
+    DISALLOW_COPY_AND_ASSIGN(long_adder_wrapper);
+};
+
+} // namespace dsn
diff --git a/include/dsn/utility/ports.h b/include/dsn/utility/ports.h
index 1a9727fad8..6e2b182755 100644
--- a/include/dsn/utility/ports.h
+++ b/include/dsn/utility/ports.h
@@ -78,3 +78,31 @@
 #include <machine/endian.h> // NOLINT(build/include)
 
 #endif
+
+// Cache line alignment
+#if defined(__i386__) || defined(__x86_64__)
+#define CACHELINE_SIZE 64
+#elif defined(__powerpc64__)
+// TODO(user) This is the L1 D-cache line size of our Power7 machines.
+// Need to check if this is appropriate for other PowerPC64 systems.
+#define CACHELINE_SIZE 128
+#elif defined(__aarch64__)
+#define CACHELINE_SIZE 64
+#elif defined(__arm__)
+// Cache line sizes for ARM: These values are not strictly correct since
+// cache line sizes depend on implementations, not architectures.  There
+// are even implementations with cache line sizes configurable at boot
+// time.
+#if defined(__ARM_ARCH_5T__)
+#define CACHELINE_SIZE 32
+#elif defined(__ARM_ARCH_7A__)
+#define CACHELINE_SIZE 64
+#endif
+#endif
+
+// This is a NOP if CACHELINE_SIZE is not defined.
+#ifdef CACHELINE_SIZE
+#define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+#else
+#define CACHELINE_ALIGNED
+#endif
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt
index b95379557c..f434520eb9 100644
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -18,4 +18,5 @@ else()
     dsn_add_shared_library()
 endif()
 
+add_subdirectory(long_adder_bench)
 add_subdirectory(test)
diff --git a/src/utils/long_adder.cpp b/src/utils/long_adder.cpp
new file mode 100644
index 0000000000..af080f3d5b
--- /dev/null
+++ b/src/utils/long_adder.cpp
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/long_adder.h>
+
+#ifdef __aarch64__
+#define _mm_free(p) free(p)
+#define _mm_malloc(a, b) malloc(a)
+#else
+#include <mm_malloc.h>
+#endif //__aarch64__
+
+#include <unistd.h>
+
+#include <cstdlib>
+#include <new>
+#include <string>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/rand.h>
+#include <dsn/utility/safe_strerror_posix.h>
+
+namespace dsn {
+
+namespace {
+
+const uint32_t kNumCpus = sysconf(_SC_NPROCESSORS_ONLN);
+uint32_t compute_num_cells()
+{
+    uint32_t n = 1;
+    // Calculate the size. Nearest power of two >= NCPU.
+    // Also handle a negative NCPU, can happen if sysconf name is unknown
+    while (kNumCpus > n) {
+        n <<= 1;
+    }
+    return n;
+}
+const uint32_t kNumCells = compute_num_cells();
+const uint32_t kCellMask = kNumCells - 1;
+
+cacheline_aligned_int64 *const kCellsLocked = reinterpret_cast<cacheline_aligned_int64 *>(-1L);
+
+} // anonymous namespace
+
+//
+// cacheline_aligned_int64
+//
+
+/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size)
+{
+    void *buffer = nullptr;
+    int err = posix_memalign(&buffer, CACHELINE_SIZE, sizeof(cacheline_aligned_int64) * size);
+
+    // Generally there are 2 possible errors for posix_memalign as below:
+    // [EINVAL]
+    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).
+    // [ENOMEM]
+    //     There is insufficient memory available with the requested alignment.
+    // Thus making an assertion here is enough.
+    dassert_f(err == 0, "error calling posix_memalign: {}", utils::safe_strerror(err).c_str());
+
+    cacheline_aligned_int64 *array = new (buffer) cacheline_aligned_int64[size];
+    for (uint32_t i = 0; i < size; ++i) {
+        cacheline_aligned_int64 *elem = &(array[i]);
+        dassert_f(
+            (reinterpret_cast<const uintptr_t>(elem) & (sizeof(cacheline_aligned_int64) - 1)) == 0,
+            "unaligned cacheline_aligned_int64: array={}, index={}, elem={}, mask={}",
+            fmt::ptr(array),
+            i,
+            fmt::ptr(elem),
+            sizeof(cacheline_aligned_int64) - 1);
+        array[i]._value.store(0);
+    }
+
+    return cacheline_aligned_int64_ptr(array, [](cacheline_aligned_int64 *array) { free(array); });
+}
+
+/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64()
+{
+    return new_cacheline_aligned_int64_array(1);
+}
+
+//
+// striped64
+//
+__thread uint64_t striped64::_tls_hashcode = 0;
+
+uint64_t striped64::get_tls_hashcode()
+{
+    if (dsn_unlikely(_tls_hashcode == 0)) {
+        const uint64_t tid = static_cast<uint64_t>(utils::get_current_tid());
+        // Avoid zero to allow xorShift rehash, and because 0 indicates an unset
+        // hashcode above.
+        const uint64_t hash = (tid == 0) ? rand::next_u64() : tid;
+        _tls_hashcode = (hash == 0) ? 1 : hash;
+    }
+    return _tls_hashcode;
+}
+
+template <class Updater>
+void striped64::retry_update(rehash to_rehash, Updater updater)
+{
+    uint64_t h = get_tls_hashcode();
+    // There are three operations in this loop.
+    //
+    // 1. Try to add to the cacheline_aligned_int64 hash table entry for the thread if the table
+    // exists. When there's contention, rehash to try a different cacheline_aligned_int64.
+    // 2. Try to initialize the hash table.
+    // 3. Try to update the base counter.
+    //
+    // These are predicated on successful CAS operations, which is why it's all wrapped in an
+    // infinite retry loop.
+    while (true) {
+        cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);
+        if (cells != nullptr && cells != kCellsLocked) {
+            if (to_rehash == kRehash) {
+                // CAS failed already, rehash before trying to increment.
+                to_rehash = kNoRehash;
+            } else {
+                cacheline_aligned_int64 *cell = &(_cells[h & kCellMask]);
+                int64_t v = cell->_value.load(std::memory_order_relaxed);
+                if (cell->compare_and_set(v, updater(v))) {
+                    // Successfully CAS'd the corresponding cell, done.
+                    break;
+                }
+            }
+            // Rehash since we failed to CAS, either previously or just now.
+            h ^= h << 13;
+            h ^= h >> 17;
+            h ^= h << 5;
+        } else if (cells == nullptr && _cells.compare_exchange_weak(cells, kCellsLocked)) {
+            _cells_holder = new_cacheline_aligned_int64_array(kNumCells);
+            cells = _cells_holder.get();
+            _cells.store(cells, std::memory_order_release);
+        } else {
+            // Fallback to adding to the base value.
+            // Means the table wasn't initialized or we failed to init it.
+            int64_t v = _base.load(std::memory_order_relaxed);
+            if (cas_base(v, updater(v))) {
+                break;
+            }
+        }
+    }
+
+    // Record index for next time
+    _tls_hashcode = h;
+}
+
+void striped64::internal_reset(int64_t initial_value)
+{
+    _base.store(initial_value);
+
+    cacheline_aligned_int64 *c;
+    do {
+        c = _cells.load(std::memory_order_acquire);
+    } while (c == kCellsLocked);
+
+    if (c != nullptr) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            c[i]._value.store(0);
+        }
+    }
+}
+
+void striped_long_adder::increment_by(int64_t x)
+{
+    // Use hash table if present. If that fails, call retry_update to rehash and retry.
+    // If no hash table, try to CAS the base counter. If that fails, retry_update to init the table.
+    cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);
+    if (cells != nullptr && cells != kCellsLocked) {
+        cacheline_aligned_int64 *cell = &(cells[get_tls_hashcode() & kCellMask]);
+        dassert_f(
+            (reinterpret_cast<const uintptr_t>(cell) & (sizeof(cacheline_aligned_int64) - 1)) == 0,
+            "unaligned cacheline_aligned_int64 not allowed for striped64: cell={}, mask={}",
+            fmt::ptr(cell),
+            sizeof(cacheline_aligned_int64) - 1);
+
+        const int64_t old = cell->_value.load(std::memory_order_relaxed);
+        if (!cell->compare_and_set(old, old + x)) {
+            // When we hit a hash table contention, signal retry_update to rehash.
+            retry_update(kRehash, [x](int64_t old) { return old + x; });
+        }
+    } else {
+        int64_t b = _base.load(std::memory_order_relaxed);
+        if (!cas_base(b, b + x)) {
+            // Attempt to initialize the table. No need to rehash since the contention was for the
+            // base counter, not the hash table.
+            retry_update(kNoRehash, [x](int64_t old) { return old + x; });
+        }
+    }
+}
+
+//
+// striped_long_adder
+//
+
+int64_t striped_long_adder::value() const
+{
+    int64_t sum = _base.load(std::memory_order_relaxed);
+
+    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);
+    if (c != nullptr && c != kCellsLocked) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            sum += c[i]._value.load(std::memory_order_relaxed);
+        }
+    }
+    return sum;
+}
+
+int64_t striped_long_adder::fetch_and_reset()
+{
+    int64_t sum = _base.exchange(0, std::memory_order_relaxed);
+
+    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);
+    if (c != nullptr && c != kCellsLocked) {
+        for (uint32_t i = 0; i < kNumCells; ++i) {
+            sum += c[i]._value.exchange(0, std::memory_order_relaxed);
+        }
+    }
+    return sum;
+}
+
+//
+// concurrent_long_adder
+//
+
+concurrent_long_adder::concurrent_long_adder()
+    : _cells_holder(new_cacheline_aligned_int64_array(kNumCells)), _cells(_cells_holder.get())
+{
+}
+
+void concurrent_long_adder::increment_by(int64_t x)
+{
+    auto task_id = static_cast<uint32_t>(utils::get_current_tid());
+    _cells[task_id & kCellMask]._value.fetch_add(x, std::memory_order_relaxed);
+}
+
+int64_t concurrent_long_adder::value() const
+{
+    int64_t sum = 0;
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        sum += _cells[i]._value.load(std::memory_order_relaxed);
+    }
+    return sum;
+}
+
+void concurrent_long_adder::set(int64_t val)
+{
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        _cells[i]._value.store(0, std::memory_order_relaxed);
+    }
+    _cells[0]._value.store(val, std::memory_order_relaxed);
+}
+
+int64_t concurrent_long_adder::fetch_and_reset()
+{
+    int64_t sum = 0;
+    for (uint32_t i = 0; i < kNumCells; ++i) {
+        sum += _cells[i]._value.exchange(0, std::memory_order_relaxed);
+    }
+    return sum;
+}
+
+} // namespace dsn
diff --git a/src/utils/long_adder_bench/CMakeLists.txt b/src/utils/long_adder_bench/CMakeLists.txt
new file mode 100644
index 0000000000..f63efc8a96
--- /dev/null
+++ b/src/utils/long_adder_bench/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(MY_PROJ_NAME long_adder_bench)
+project(${MY_PROJ_NAME} C CXX)
+
+# Source files under CURRENT project directory will be automatically included.
+# You can manually set MY_PROJ_SRC to include source files under other directories.
+set(MY_PROJ_SRC "")
+
+# Search mode for source files under CURRENT project directory?
+# "GLOB_RECURSE" for recursive search
+# "GLOB" for non-recursive search
+set(MY_SRC_SEARCH_MODE "GLOB")
+
+set(MY_PROJ_LIBS dsn_runtime dsn_utils)
+
+set(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)
+
+# Extra files that will be installed
+set(MY_BINPLACES "")
+
+dsn_add_executable()
+
+dsn_install_executable()
diff --git a/src/utils/long_adder_bench/long_adder_bench.cpp b/src/utils/long_adder_bench/long_adder_bench.cpp
new file mode 100644
index 0000000000..6fbb249b8f
--- /dev/null
+++ b/src/utils/long_adder_bench/long_adder_bench.cpp
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <cstdlib>
+#include <cstring>
+#include <thread>
+#include <vector>
+
+#include <fmt/ostream.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/long_adder.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/string_conv.h>
+
+// The simplest implementation of long adder: just wrap std::atomic<int64_t>.
+class simple_long_adder
+{
+public:
+    simple_long_adder() = default;
+
+    ~simple_long_adder() = default;
+
+    inline void increment_by(int64_t x) { _value.fetch_add(x, std::memory_order_relaxed); }
+
+    inline int64_t value() const { return _value.load(std::memory_order_relaxed); }
+
+    inline void reset() { set(0); }
+
+    inline int64_t fetch_and_reset() { return _value.exchange(0, std::memory_order_relaxed); }
+
+private:
+    inline void set(int64_t val) { _value.store(val, std::memory_order_relaxed); }
+
+    std::atomic<int64_t> _value{0};
+
+    DISALLOW_COPY_AND_ASSIGN(simple_long_adder);
+};
+
+// A modification of perf_counter_number_atomic from perf_counter.
+// This modification has removed virtual functions from original version, where main interfaces
+// has been implemented as virtual functions, however, which will slow down the execution.
+#define DIVIDE_CONTAINER 107
+class divided_long_adder
+{
+public:
+    divided_long_adder()
+    {
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            _value[i].store(0);
+        }
+    }
+
+    ~divided_long_adder() = default;
+
+    inline void increment_by(int64_t x)
+    {
+        auto task_id = static_cast<uint32_t>(dsn::utils::get_current_tid());
+        _value[task_id % DIVIDE_CONTAINER].fetch_add(x, std::memory_order_relaxed);
+    }
+
+    int64_t value() const
+    {
+        int64_t sum = 0;
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            sum += _value[i].load(std::memory_order_relaxed);
+        }
+        return sum;
+    }
+
+    inline void reset() { set(0); }
+
+    int64_t fetch_and_reset()
+    {
+        int64_t sum = 0;
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            sum += _value[i].exchange(0, std::memory_order_relaxed);
+        }
+        return sum;
+    }
+
+private:
+    void set(int64_t val)
+    {
+        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {
+            _value[i].store(0, std::memory_order_relaxed);
+        }
+        _value[0].store(val, std::memory_order_relaxed);
+    }
+
+    std::atomic<int64_t> _value[DIVIDE_CONTAINER];
+
+    DISALLOW_COPY_AND_ASSIGN(divided_long_adder);
+};
+
+void print_usage(const char *cmd)
+{
+    fmt::print(stderr, "USAGE: {} <num_operations> <num_threads> <long_adder_type>\n", cmd);
+    fmt::print(stderr, "Run a simple benchmark that executes each sort of long adder.\n\n");
+
+    fmt::print(
+        stderr,
+        "    <num_operations>       the number of increment operations executed by each thread\n");
+    fmt::print(stderr, "    <num_threads>          the number of threads\n");
+    fmt::print(stderr,
+               "    <long_adder_type>      the type of long adder: simple_long_adder, "
+               "divided_long_adder, striped_long_adder, concurrent_long_adder\n");
+}
+
+template <typename Adder>
+void run_bench(int64_t num_operations, int64_t num_threads, const char *name)
+{
+    dsn::long_adder_wrapper<Adder> adder;
+
+    std::vector<std::thread> threads;
+
+    uint64_t start = dsn_now_ns();
+    for (int64_t i = 0; i < num_threads; i++) {
+        threads.emplace_back([num_operations, &adder]() {
+            for (int64_t i = 0; i < num_operations; ++i) {
+                adder.increment();
+            }
+        });
+    }
+    for (auto &t : threads) {
+        t.join();
+    }
+    uint64_t end = dsn_now_ns();
+
+    auto duration_ns = static_cast<int64_t>(end - start);
+    std::chrono::nanoseconds nano(duration_ns);
+    auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();
+
+    fmt::print(stdout,
+               "Running {} operations of {} with {} threads took {} seconds, result = {}.\n",
+               num_operations,
+               name,
+               num_threads,
+               duration_s,
+               adder.value());
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 4) {
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    int64_t num_operations;
+    if (!dsn::buf2int64(argv[1], num_operations)) {
+        fmt::print(stderr, "Invalid num_operations: {}\n\n", argv[1]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    int64_t num_threads;
+    if (!dsn::buf2int64(argv[2], num_threads)) {
+        fmt::print(stderr, "Invalid num_threads: {}\n\n", argv[2]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    const char *long_adder_type = argv[3];
+    if (strcmp(long_adder_type, "simple_long_adder") == 0) {
+        run_bench<simple_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "divided_long_adder") == 0) {
+        run_bench<divided_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "striped_long_adder") == 0) {
+        run_bench<dsn::striped_long_adder>(num_operations, num_threads, long_adder_type);
+    } else if (strcmp(long_adder_type, "concurrent_long_adder") == 0) {
+        run_bench<dsn::concurrent_long_adder>(num_operations, num_threads, long_adder_type);
+    } else {
+        fmt::print(stderr, "Invalid long_adder_type: {}\n\n", long_adder_type);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    return 0;
+}
diff --git a/src/utils/test/long_adder_test.cpp b/src/utils/test/long_adder_test.cpp
new file mode 100644
index 0000000000..71d8e4ffb9
--- /dev/null
+++ b/src/utils/test/long_adder_test.cpp
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <functional>
+#include <thread>
+#include <vector>
+
+#include <fmt/ostream.h>
+#include <gtest/gtest.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/long_adder.h>
+
+namespace dsn {
+
+template <typename T>
+struct type_parse_traits;
+
+#define REGISTER_PARSE_TYPE(X)                                                                     \
+    template <>                                                                                    \
+    struct type_parse_traits<X>                                                                    \
+    {                                                                                              \
+        static const char *name;                                                                   \
+    };                                                                                             \
+    const char *type_parse_traits<X>::name = #X
+
+REGISTER_PARSE_TYPE(striped_long_adder);
+REGISTER_PARSE_TYPE(concurrent_long_adder);
+
+template <typename Adder>
+class long_adder_test
+{
+public:
+    long_adder_test() = default;
+
+    void run_increment_by(int64_t base_value,
+                          int64_t delta,
+                          int64_t num_operations,
+                          int64_t num_threads,
+                          int64_t &result)
+    {
+        execute(num_threads,
+                [this, delta, num_operations]() { this->increment_by(delta, num_operations); });
+        result = base_value + delta * num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void
+    run_increment(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)
+    {
+        execute(num_threads, [this, num_operations]() { this->increment(num_operations); });
+        result = base_value + num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void
+    run_decrement(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)
+    {
+        execute(num_threads, [this, num_operations]() { this->decrement(num_operations); });
+        result = base_value - num_operations * num_threads;
+        ASSERT_EQ(result, _adder.value());
+    }
+
+    void run_basic_cases(int64_t num_threads)
+    {
+        fmt::print(stdout,
+                   "Ready to run basic cases for {} with {} threads.\n",
+                   type_parse_traits<Adder>::name,
+                   num_threads);
+
+        // Initially should be zero
+        int64_t base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Do basic test with custom number of threads
+        auto do_increment_by = std::bind(&long_adder_test::run_increment_by,
+                                         this,
+                                         std::placeholders::_1,
+                                         std::placeholders::_2,
+                                         std::placeholders::_3,
+                                         num_threads,
+                                         std::placeholders::_4);
+        auto do_increment = std::bind(&long_adder_test::run_increment,
+                                      this,
+                                      std::placeholders::_1,
+                                      std::placeholders::_2,
+                                      num_threads,
+                                      std::placeholders::_3);
+        auto do_decrement = std::bind(&long_adder_test::run_decrement,
+                                      this,
+                                      std::placeholders::_1,
+                                      std::placeholders::_2,
+                                      num_threads,
+                                      std::placeholders::_3);
+
+        // Test increment_by
+        do_increment_by(base_value, 1, 1, base_value);
+        do_increment_by(base_value, 100, 1, base_value);
+        do_increment_by(base_value, 10, 10, base_value);
+        do_increment_by(base_value, -10, 10, base_value);
+        do_increment_by(base_value, -100, 1, base_value);
+        do_increment_by(base_value, -1, 1, base_value);
+        ASSERT_EQ(0, _adder.value());
+        ASSERT_EQ(0, base_value);
+
+        // Test increment
+        do_increment(base_value, 1, base_value);
+        do_increment(base_value, 100, base_value);
+
+        // Fetch and reset
+        ASSERT_EQ(base_value, _adder.fetch_and_reset());
+        base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Test decrement
+        do_decrement(base_value, 100, base_value);
+        do_decrement(base_value, 1, base_value);
+
+        // Reset at last
+        _adder.reset();
+        base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+    }
+
+    void run_concurrent_cases(int64_t num_operations, int64_t num_threads)
+    {
+        fmt::print(
+            stdout, "Ready to run concurrent cases for {}:\n", type_parse_traits<Adder>::name);
+
+        // Initially adder should be zero
+        int64_t base_value = 0;
+        ASSERT_EQ(base_value, _adder.value());
+
+        // Define runner to time each case
+        auto runner = [num_operations, num_threads](
+            const char *name, std::function<void(int64_t &)> func, int64_t &result) {
+            uint64_t start = dsn_now_ns();
+            func(result);
+            uint64_t end = dsn_now_ns();
+
+            auto duration_ns = static_cast<int64_t>(end - start);
+            std::chrono::nanoseconds nano(duration_ns);
+            auto duration_ms =
+                std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(nano).count();
+
+            fmt::print(stdout,
+                       "Running {} operations of {} with {} threads took {} ms.\n",
+                       num_operations,
+                       name,
+                       num_threads,
+                       duration_ms);
+        };
+
+        // Test increment
+        auto do_increment = std::bind(&long_adder_test::run_increment,
+                                      this,
+                                      base_value,
+                                      num_operations,
+                                      num_threads,
+                                      std::placeholders::_1);
+        runner("Increment", do_increment, base_value);
+
+        // Test decrement
+        auto do_decrement = std::bind(&long_adder_test::run_decrement,
+                                      this,
+                                      base_value,
+                                      num_operations,
+                                      num_threads,
+                                      std::placeholders::_1);
+        runner("Decrement", do_decrement, base_value);
+
+        // At last adder should also be zero
+        ASSERT_EQ(0, _adder.value());
+        ASSERT_EQ(0, base_value);
+    }
+
+private:
+    void increment_by(int64_t delta, int64_t n)
+    {
+        for (int64_t i = 0; i < n; ++i) {
+            _adder.increment_by(delta);
+        }
+    }
+
+    void increment(int64_t num)
+    {
+        for (int64_t i = 0; i < num; ++i) {
+            _adder.increment();
+        }
+    }
+
+    void decrement(int64_t num)
+    {
+        for (int64_t i = 0; i < num; ++i) {
+            _adder.decrement();
+        }
+    }
+
+    void execute(int64_t num_threads, std::function<void()> runner)
+    {
+        std::vector<std::thread> threads;
+        for (int64_t i = 0; i < num_threads; i++) {
+            threads.emplace_back(runner);
+        }
+        for (auto &t : threads) {
+            t.join();
+        }
+    }
+
+    long_adder_wrapper<Adder> _adder;
+};
+
+template <typename Adder>
+void run_basic_cases()
+{
+    long_adder_test<Adder> test;
+    test.run_basic_cases(1);
+    test.run_basic_cases(4);
+}
+
+template <typename Adder0, typename Adder1, typename... Others>
+void run_basic_cases()
+{
+    run_basic_cases<Adder0>();
+    run_basic_cases<Adder1, Others...>();
+}
+
+template <typename Adder>
+void run_concurrent_cases()
+{
+    long_adder_test<Adder> test;
+    test.run_concurrent_cases(10000000, 1);
+    test.run_concurrent_cases(10000000, 4);
+}
+
+template <typename Adder0, typename Adder1, typename... Others>
+void run_concurrent_cases()
+{
+    run_concurrent_cases<Adder0>();
+    run_concurrent_cases<Adder1, Others...>();
+}
+
+TEST(long_adder_test, basic_cases) { run_basic_cases<striped_long_adder, concurrent_long_adder>(); }
+
+TEST(long_adder_test, concurrent_cases)
+{
+    run_concurrent_cases<striped_long_adder, concurrent_long_adder>();
+}
+
+} // namespace dsn

From 1e3789b303b2897cc841a02304940c21f143228d Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Thu, 10 Mar 2022 14:15:18 +0800
Subject: [PATCH 12/21] feat(new_metrics): implement the metric entity & its
 prototype (#1070)

---
 include/dsn/utility/metrics.h   | 92 +++++++++++++++++++++++++++++++++
 src/utils/metrics.cpp           | 55 ++++++++++++++++++++
 src/utils/test/metrics_test.cpp | 80 ++++++++++++++++++++++++++++
 3 files changed, 227 insertions(+)
 create mode 100644 include/dsn/utility/metrics.h
 create mode 100644 src/utils/metrics.cpp
 create mode 100644 src/utils/test/metrics_test.cpp

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
new file mode 100644
index 0000000000..5908425742
--- /dev/null
+++ b/include/dsn/utility/metrics.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+#include <dsn/utility/autoref_ptr.h>
+#include <dsn/utility/ports.h>
+
+// A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
+// inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
+//
+//
+// Example of defining and instantiating a metric entity
+// -----------------------------------------------------
+// Define an entity type at the top of your .cpp file (not within any namespace):
+// METRIC_DEFINE_entity(my_entity);
+//
+// To use the entity type, declare it at the top of any .h/.cpp file (not within any namespace):
+// METRIC_DECLARE_entity(my_entity);
+//
+// Instantiating the entity in whatever class represents the entity:
+// entity_instance = METRIC_ENTITY_my_entity.instantiate(my_entity_id, ...);
+
+// Define a new entity type.
+#define METRIC_DEFINE_entity(name) ::dsn::metric_entity_prototype METRIC_ENTITY_##name(#name)
+
+// The following macros act as forward declarations for entity types and metric prototypes.
+#define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
+
+namespace dsn {
+
+class metric_entity : public ref_counter
+{
+public:
+    using attr_map = std::unordered_map<std::string, std::string>;
+
+    const std::string &id() const { return _id; }
+
+    const attr_map &attributes() const { return _attrs; }
+
+private:
+    friend class metric_entity_prototype;
+    friend class ref_ptr<metric_entity>;
+
+    metric_entity(const std::string &id, attr_map &&attrs);
+
+    ~metric_entity();
+
+    const std::string _id;
+    const attr_map _attrs;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_entity);
+};
+
+using metric_entity_ptr = ref_ptr<metric_entity>;
+
+class metric_entity_prototype
+{
+public:
+    explicit metric_entity_prototype(const char *name);
+    ~metric_entity_prototype();
+
+    const char *name() const { return _name; }
+
+    // Create an entity with the given ID and attributes, if any.
+    metric_entity_ptr instantiate(const std::string &id, metric_entity::attr_map attrs) const;
+    metric_entity_ptr instantiate(const std::string &id) const;
+
+private:
+    const char *const _name;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_entity_prototype);
+};
+
+} // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
new file mode 100644
index 0000000000..0704ef7d7b
--- /dev/null
+++ b/src/utils/metrics.cpp
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/metrics.h>
+
+#include <utility>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+
+namespace dsn {
+
+metric_entity::metric_entity(const std::string &id, attr_map &&attrs)
+    : _id(id), _attrs(std::move(attrs))
+{
+}
+
+metric_entity::~metric_entity() {}
+
+ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id,
+                                                            metric_entity::attr_map attrs) const
+{
+    dassert_f(attrs.find("entity") == attrs.end(), "{}'s attribute \"entity\" is reserved", id);
+
+    attrs["entity"] = _name;
+    ref_ptr<metric_entity> entity(new metric_entity(id, std::move(attrs)));
+
+    return entity;
+}
+
+ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id) const
+{
+
+    return instantiate(id, {});
+}
+
+metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name) {}
+
+metric_entity_prototype::~metric_entity_prototype() {}
+
+} // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
new file mode 100644
index 0000000000..cf15356594
--- /dev/null
+++ b/src/utils/test/metrics_test.cpp
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/metrics.h>
+
+#include <gtest/gtest.h>
+
+METRIC_DEFINE_entity(my_server);
+METRIC_DEFINE_entity(my_table);
+METRIC_DEFINE_entity(my_replica);
+
+namespace dsn {
+
+TEST(metrics_test, create_entity)
+{
+    // Test cases:
+    // - create an entity by instantiate(id) without any attribute
+    // - create another entity by instantiate(id, attrs) without any attribute
+    // - create an entity with an attribute
+    // - create another entity with an attribute
+    // - create an entity with 2 attributes
+    // - create another entity with 2 attributes
+    struct test_case
+    {
+        metric_entity_prototype *prototype;
+        std::string type_name;
+        std::string entity_id;
+        metric_entity::attr_map entity_attrs;
+        bool use_attrs_arg_if_empty;
+    } tests[] = {{&METRIC_ENTITY_my_server, "my_server", "server_1", {}, false},
+                 {&METRIC_ENTITY_my_server, "my_server", "server_2", {}, true},
+                 {&METRIC_ENTITY_my_table, "my_table", "test_1", {{"table", "test_1"}}, true},
+                 {&METRIC_ENTITY_my_table, "my_table", "test_2", {{"table", "test_2"}}, true},
+                 {&METRIC_ENTITY_my_replica,
+                  "my_replica",
+                  "1.2",
+                  {{"table", "test_1"}, {"partition", "2"}},
+                  true},
+                 {&METRIC_ENTITY_my_replica,
+                  "my_replica",
+                  "2.5",
+                  {{"table", "test_2"}, {"partition", "5"}},
+                  true}};
+    for (const auto &test : tests) {
+        ASSERT_EQ(test.prototype->name(), test.type_name);
+
+        metric_entity_ptr entity;
+        if (test.entity_attrs.empty() && !test.use_attrs_arg_if_empty) {
+            entity = test.prototype->instantiate(test.entity_id);
+        } else {
+            entity = test.prototype->instantiate(test.entity_id, test.entity_attrs);
+        }
+
+        auto id = entity->id();
+        ASSERT_EQ(id, test.entity_id);
+
+        auto attrs = entity->attributes();
+        ASSERT_NE(attrs.find("entity"), attrs.end());
+        ASSERT_EQ(attrs["entity"], test.type_name);
+        ASSERT_EQ(attrs.size(), test.entity_attrs.size() + 1);
+        ASSERT_EQ(attrs.erase("entity"), 1);
+        ASSERT_EQ(attrs, test.entity_attrs);
+    }
+}
+
+} // namespace dsn

From fe1f009f870bb3367ffca719446fea6473fcf880 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Sat, 12 Mar 2022 12:23:34 +0800
Subject: [PATCH 13/21] feat(new_metrics): implement the metric registry
 (#1073)

---
 include/dsn/utility/metrics.h   | 34 +++++++++++++++++++--
 src/utils/metrics.cpp           | 53 ++++++++++++++++++++++++++++-----
 src/utils/test/metrics_test.cpp | 35 ++++++++++++++++++++++
 3 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 5908425742..03df62b5a9 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,11 +17,13 @@
 
 #pragma once
 
+#include <mutex>
 #include <string>
 #include <unordered_map>
 
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/ports.h>
+#include <dsn/utility/singleton.h>
 
 // A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
 // inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
@@ -53,18 +55,22 @@ class metric_entity : public ref_counter
 
     const std::string &id() const { return _id; }
 
-    const attr_map &attributes() const { return _attrs; }
+    attr_map attributes() const;
 
 private:
-    friend class metric_entity_prototype;
+    friend class metric_registry;
     friend class ref_ptr<metric_entity>;
 
     metric_entity(const std::string &id, attr_map &&attrs);
 
     ~metric_entity();
 
+    void set_attributes(attr_map &&attrs);
+
     const std::string _id;
-    const attr_map _attrs;
+
+    mutable std::mutex _mtx;
+    attr_map _attrs;
 
     DISALLOW_COPY_AND_ASSIGN(metric_entity);
 };
@@ -89,4 +95,26 @@ class metric_entity_prototype
     DISALLOW_COPY_AND_ASSIGN(metric_entity_prototype);
 };
 
+class metric_registry : public utils::singleton<metric_registry>
+{
+public:
+    using entity_map = std::unordered_map<std::string, metric_entity_ptr>;
+
+    entity_map entities() const;
+
+private:
+    friend class metric_entity_prototype;
+    friend class utils::singleton<metric_registry>;
+
+    metric_registry();
+    ~metric_registry();
+
+    metric_entity_ptr find_or_create_entity(const std::string &id, metric_entity::attr_map &&attrs);
+
+    mutable std::mutex _mtx;
+    entity_map _entities;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_registry);
+};
+
 } // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 0704ef7d7b..90c4cd759e 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -31,20 +31,29 @@ metric_entity::metric_entity(const std::string &id, attr_map &&attrs)
 
 metric_entity::~metric_entity() {}
 
-ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id,
-                                                            metric_entity::attr_map attrs) const
+metric_entity::attr_map metric_entity::attributes() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    return _attrs;
+}
+
+void metric_entity::set_attributes(attr_map &&attrs)
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    _attrs = std::move(attrs);
+}
+
+metric_entity_ptr metric_entity_prototype::instantiate(const std::string &id,
+                                                       metric_entity::attr_map attrs) const
 {
     dassert_f(attrs.find("entity") == attrs.end(), "{}'s attribute \"entity\" is reserved", id);
 
     attrs["entity"] = _name;
-    ref_ptr<metric_entity> entity(new metric_entity(id, std::move(attrs)));
-
-    return entity;
+    return metric_registry::instance().find_or_create_entity(id, std::move(attrs));
 }
 
-ref_ptr<metric_entity> metric_entity_prototype::instantiate(const std::string &id) const
+metric_entity_ptr metric_entity_prototype::instantiate(const std::string &id) const
 {
-
     return instantiate(id, {});
 }
 
@@ -52,4 +61,34 @@ metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name)
 
 metric_entity_prototype::~metric_entity_prototype() {}
 
+metric_registry::metric_registry() {}
+
+metric_registry::~metric_registry() {}
+
+metric_registry::entity_map metric_registry::entities() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+
+    return _entities;
+}
+
+metric_entity_ptr metric_registry::find_or_create_entity(const std::string &id,
+                                                         metric_entity::attr_map &&attrs)
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+
+    entity_map::const_iterator iter = _entities.find(id);
+
+    metric_entity_ptr entity;
+    if (iter == _entities.end()) {
+        entity = new metric_entity(id, std::move(attrs));
+        _entities[id] = entity;
+    } else {
+        iter->second->set_attributes(std::move(attrs));
+        entity = iter->second;
+    }
+
+    return entity;
+}
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index cf15356594..e6f820a5b7 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -55,6 +55,8 @@ TEST(metrics_test, create_entity)
                   "2.5",
                   {{"table", "test_2"}, {"partition", "5"}},
                   true}};
+
+    metric_registry::entity_map entities;
     for (const auto &test : tests) {
         ASSERT_EQ(test.prototype->name(), test.type_name);
 
@@ -74,6 +76,39 @@ TEST(metrics_test, create_entity)
         ASSERT_EQ(attrs.size(), test.entity_attrs.size() + 1);
         ASSERT_EQ(attrs.erase("entity"), 1);
         ASSERT_EQ(attrs, test.entity_attrs);
+
+        ASSERT_EQ(entities.find(test.entity_id), entities.end());
+        entities[test.entity_id] = entity;
+    }
+
+    ASSERT_EQ(metric_registry::instance().entities(), entities);
+}
+
+TEST(metrics_test, recreate_entity)
+{
+    // Test cases:
+    // - add an attribute to an emtpy map
+    // - add another attribute to a single-element map
+    // - remove an attribute from the map
+    // - remove the only attribute from the map
+    struct test_case
+    {
+        metric_entity::attr_map entity_attrs;
+    } tests[] = {
+        {{{"name", "test"}}}, {{{"name", "test"}, {"id", "2"}}}, {{{"name", "test"}}}, {{{}}}};
+
+    const std::string entity_id("test");
+    auto expected_entity = METRIC_ENTITY_my_table.instantiate(entity_id);
+
+    for (const auto &test : tests) {
+        // the pointer of entity should be kept unchanged
+        auto entity = METRIC_ENTITY_my_table.instantiate(entity_id, test.entity_attrs);
+        ASSERT_EQ(entity, expected_entity);
+
+        // the attributes will updated
+        auto attrs = entity->attributes();
+        ASSERT_EQ(attrs.erase("entity"), 1);
+        ASSERT_EQ(attrs, test.entity_attrs);
     }
 }
 

From 028512ba16872f944c8314514b9772ae64a47839 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 18 Mar 2022 22:27:05 +0800
Subject: [PATCH 14/21] feat(new_metrics): implement the metric & its prototype
 (#1075)

---
 include/dsn/utility/casts.h     |  47 +++++++++++++
 include/dsn/utility/metrics.h   | 115 ++++++++++++++++++++++++++++++++
 src/utils/metrics.cpp           |  14 +++-
 src/utils/test/metrics_test.cpp | 112 ++++++++++++++++++++++++++++++-
 4 files changed, 285 insertions(+), 3 deletions(-)
 create mode 100644 include/dsn/utility/casts.h

diff --git a/include/dsn/utility/casts.h b/include/dsn/utility/casts.h
new file mode 100644
index 0000000000..8b3466843a
--- /dev/null
+++ b/include/dsn/utility/casts.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+namespace dsn {
+
+// Downcasting is to convert a base-class pointer(reference) to a derived-class
+// pointer(reference). As a usual approach, RTTI (dynamic_cast<>) is not efficient.
+// Instead, we can perform a compile-time assertion check whether one is derived
+// from another; then, just use static_cast<> to do the conversion faster. RTTI is
+// also run in debug mode to do double-check.
+
+template <typename To, typename From>
+inline To down_cast(From *from)
+{
+    // Perform a compile-time assertion to check whether <From> class is derived from <To> class.
+    static_assert(std::is_base_of<typename std::remove_pointer<From>::type,
+                                  typename std::remove_pointer<To>::type>::value,
+                  "<From> class is not derived from <To> class");
+
+    // Use RTTI to do double-check, though in practice the unit tests are seldom built in debug
+    // mode. For example, the unit tests of github CI for both rDSN and Pegasus are built in
+    // release mode.
+    assert(from == NULL || dynamic_cast<To>(from) != NULL);
+
+    return static_cast<To>(from);
+}
+
+} // namespace dsn
diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 03df62b5a9..e4448ba018 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -20,10 +20,14 @@
 #include <mutex>
 #include <string>
 #include <unordered_map>
+#include <utility>
 
 #include <dsn/utility/autoref_ptr.h>
+#include <dsn/utility/casts.h>
+#include <dsn/utility/enum_helper.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
+#include <dsn/utility/string_view.h>
 
 // A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)
 // inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).
@@ -48,15 +52,39 @@
 
 namespace dsn {
 
+class metric_prototype;
+class metric;
+using metric_ptr = ref_ptr<metric>;
+
 class metric_entity : public ref_counter
 {
 public:
     using attr_map = std::unordered_map<std::string, std::string>;
+    using metric_map = std::unordered_map<const metric_prototype *, metric_ptr>;
 
     const std::string &id() const { return _id; }
 
     attr_map attributes() const;
 
+    metric_map metrics() const;
+
+    // args are the parameters that are used to construct the object of MetricType
+    template <typename MetricType, typename... Args>
+    ref_ptr<MetricType> find_or_create(const metric_prototype *prototype, Args &&... args)
+    {
+        std::lock_guard<std::mutex> guard(_mtx);
+
+        metric_map::const_iterator iter = _metrics.find(prototype);
+        if (iter != _metrics.end()) {
+            auto raw_ptr = down_cast<MetricType *>(iter->second.get());
+            return raw_ptr;
+        }
+
+        ref_ptr<MetricType> ptr(new MetricType(prototype, std::forward<Args>(args)...));
+        _metrics[prototype] = ptr;
+        return ptr;
+    }
+
 private:
     friend class metric_registry;
     friend class ref_ptr<metric_entity>;
@@ -71,6 +99,7 @@ class metric_entity : public ref_counter
 
     mutable std::mutex _mtx;
     attr_map _attrs;
+    metric_map _metrics;
 
     DISALLOW_COPY_AND_ASSIGN(metric_entity);
 };
@@ -117,4 +146,90 @@ class metric_registry : public utils::singleton<metric_registry>
     DISALLOW_COPY_AND_ASSIGN(metric_registry);
 };
 
+enum class metric_unit
+{
+    kNanoSeconds,
+    kMicroSeconds,
+    kMilliSeconds,
+    kSeconds,
+    kInvalidUnit,
+};
+
+ENUM_BEGIN(metric_unit, metric_unit::kInvalidUnit)
+ENUM_REG(metric_unit::kNanoSeconds)
+ENUM_REG(metric_unit::kMicroSeconds)
+ENUM_REG(metric_unit::kMilliSeconds)
+ENUM_REG(metric_unit::kSeconds)
+ENUM_END(metric_unit)
+
+class metric_prototype
+{
+public:
+    struct ctor_args
+    {
+        const string_view entity_type;
+        const string_view name;
+        const metric_unit unit;
+        const string_view desc;
+    };
+
+    string_view entity_type() const { return _args.entity_type; }
+
+    string_view name() const { return _args.name; }
+
+    metric_unit unit() const { return _args.unit; }
+
+    string_view description() const { return _args.desc; }
+
+protected:
+    explicit metric_prototype(const ctor_args &args);
+    virtual ~metric_prototype();
+
+private:
+    const ctor_args _args;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_prototype);
+};
+
+// metric_prototype_with<MetricType> can help to implement the prototype of each type of metric
+// to construct a metric object conveniently.
+template <typename MetricType>
+class metric_prototype_with : public metric_prototype
+{
+public:
+    explicit metric_prototype_with(const ctor_args &args) : metric_prototype(args) {}
+    virtual ~metric_prototype_with() = default;
+
+    // Construct a metric object based on the instance of metric_entity.
+    template <typename... Args>
+    ref_ptr<MetricType> instantiate(const metric_entity_ptr &entity, Args &&... args) const
+    {
+        return entity->find_or_create<MetricType>(this, std::forward<Args>(args)...);
+    }
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(metric_prototype_with);
+};
+
+// Base class for each type of metric.
+// Every metric class should inherit from this class.
+//
+// User object should hold a ref_ptr of a metric, while the entity will hold another ref_ptr.
+// The ref count of a metric may becomes 1, which means the metric is only held by the entity:
+// After a period of configurable time, if the ref count is still 1, the metric will be dropped
+// in that it's considered to be useless. During the period when the metric is retained, once
+// the same one is instantiated again, it will not be removed; whether the metric is instantiated,
+// however, its lastest value is visible.
+class metric : public ref_counter
+{
+protected:
+    explicit metric(const metric_prototype *prototype);
+    virtual ~metric() = default;
+
+    const metric_prototype *const _prototype;
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(metric);
+};
+
 } // namespace dsn
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 90c4cd759e..8f96792b81 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -17,8 +17,6 @@
 
 #include <dsn/utility/metrics.h>
 
-#include <utility>
-
 #include <dsn/c/api_utilities.h>
 #include <dsn/dist/fmt_logging.h>
 
@@ -37,6 +35,12 @@ metric_entity::attr_map metric_entity::attributes() const
     return _attrs;
 }
 
+metric_entity::metric_map metric_entity::metrics() const
+{
+    std::lock_guard<std::mutex> guard(_mtx);
+    return _metrics;
+}
+
 void metric_entity::set_attributes(attr_map &&attrs)
 {
     std::lock_guard<std::mutex> guard(_mtx);
@@ -91,4 +95,10 @@ metric_entity_ptr metric_registry::find_or_create_entity(const std::string &id,
     return entity;
 }
 
+metric_prototype::metric_prototype(const ctor_args &args) : _args(args) {}
+
+metric_prototype::~metric_prototype() {}
+
+metric::metric(const metric_prototype *prototype) : _prototype(prototype) {}
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index e6f820a5b7..5bc979bc43 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -19,10 +19,55 @@
 
 #include <gtest/gtest.h>
 
+namespace dsn {
+
+class my_gauge : public metric
+{
+public:
+    explicit my_gauge(const metric_prototype *prototype) : metric(prototype), _value(0) {}
+
+    my_gauge(const metric_prototype *prototype, int64_t value) : metric(prototype), _value(value) {}
+
+    virtual ~my_gauge() = default;
+
+    int64_t value() { return _value; }
+
+private:
+    int64_t _value;
+
+    DISALLOW_COPY_AND_ASSIGN(my_gauge);
+};
+
+using my_gauge_prototype = metric_prototype_with<my_gauge>;
+using my_gauge_ptr = ref_ptr<my_gauge>;
+
+} // namespace dsn
+
+#define METRIC_DEFINE_my_gauge(entity_type, name, unit, desc, ...)                                 \
+    ::dsn::my_gauge_prototype METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+
 METRIC_DEFINE_entity(my_server);
 METRIC_DEFINE_entity(my_table);
 METRIC_DEFINE_entity(my_replica);
 
+METRIC_DEFINE_my_gauge(my_server,
+                       my_server_latency,
+                       dsn::metric_unit::kMicroSeconds,
+                       "a server-level latency for test");
+METRIC_DEFINE_my_gauge(my_server,
+                       my_server_duration,
+                       dsn::metric_unit::kSeconds,
+                       "a server-level duration for test");
+
+METRIC_DEFINE_my_gauge(my_replica,
+                       my_replica_latency,
+                       dsn::metric_unit::kNanoSeconds,
+                       "a replica-level latency for test");
+METRIC_DEFINE_my_gauge(my_replica,
+                       my_replica_duration,
+                       dsn::metric_unit::kMilliSeconds,
+                       "a replica-level duration for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -105,11 +150,76 @@ TEST(metrics_test, recreate_entity)
         auto entity = METRIC_ENTITY_my_table.instantiate(entity_id, test.entity_attrs);
         ASSERT_EQ(entity, expected_entity);
 
-        // the attributes will updated
+        // the attributes will be updated
         auto attrs = entity->attributes();
         ASSERT_EQ(attrs.erase("entity"), 1);
         ASSERT_EQ(attrs, test.entity_attrs);
     }
 }
 
+TEST(metrics_test, create_metric)
+{
+    auto my_server_entity = METRIC_ENTITY_my_server.instantiate("server_3");
+    auto my_replica_entity =
+        METRIC_ENTITY_my_replica.instantiate("3.7", {{"table", "test_3"}, {"partition", "7"}});
+
+    // Test cases:
+    // - create an metric without any argument by an entity
+    // - create an metric with an argument by an entity
+    // - create an metric with an argument by another entity
+    // - create an metric without any argument by another entity
+    struct test_case
+    {
+        my_gauge_prototype *prototype;
+        metric_entity_ptr entity;
+        bool use_default_value;
+        int64_t value;
+    } tests[] = {{&METRIC_my_server_latency, my_server_entity, true, 0},
+                 {&METRIC_my_server_duration, my_server_entity, false, 10},
+                 {&METRIC_my_replica_latency, my_replica_entity, false, 100},
+                 {&METRIC_my_replica_duration, my_replica_entity, true, 0}};
+
+    using entity_map = std::unordered_map<metric_entity *, metric_entity::metric_map>;
+
+    entity_map expected_entities;
+    for (const auto &test : tests) {
+        my_gauge_ptr my_metric;
+        if (test.use_default_value) {
+            my_metric = test.prototype->instantiate(test.entity);
+        } else {
+            my_metric = test.prototype->instantiate(test.entity, test.value);
+        }
+
+        ASSERT_EQ(my_metric->value(), test.value);
+
+        auto iter = expected_entities.find(test.entity.get());
+        if (iter == expected_entities.end()) {
+            expected_entities[test.entity.get()] = {{test.prototype, my_metric}};
+        } else {
+            iter->second[test.prototype] = my_metric;
+        }
+    }
+
+    entity_map actual_entities;
+    auto entities = metric_registry::instance().entities();
+    for (const auto &entity : entities) {
+        if (expected_entities.find(entity.second.get()) != expected_entities.end()) {
+            actual_entities[entity.second.get()] = entity.second->metrics();
+        }
+    }
+
+    ASSERT_EQ(actual_entities, expected_entities);
+}
+
+TEST(metrics_test, recreate_metric)
+{
+    auto my_server_entity = METRIC_ENTITY_my_server.instantiate("server_4");
+
+    auto my_metric = METRIC_my_server_latency.instantiate(my_server_entity, 5);
+    ASSERT_EQ(my_metric->value(), 5);
+
+    auto new_metric = METRIC_my_server_latency.instantiate(my_server_entity, 10);
+    ASSERT_EQ(my_metric->value(), 5);
+}
+
 } // namespace dsn

From e7c4a6a640b3e9124c037a55828b4d22866691fe Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 23 Mar 2022 17:19:02 +0800
Subject: [PATCH 15/21] feat(new_metrics): implement the gauge (#1079)

---
 include/dsn/utility/metrics.h   |  77 +++++++++++++++++++++++-
 src/utils/test/metrics_test.cpp | 102 +++++++++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index e4448ba018..06e76017fb 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,8 +17,10 @@
 
 #pragma once
 
+#include <atomic>
 #include <mutex>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
@@ -41,14 +43,36 @@
 // To use the entity type, declare it at the top of any .h/.cpp file (not within any namespace):
 // METRIC_DECLARE_entity(my_entity);
 //
-// Instantiating the entity in whatever class represents the entity:
+// Instantiating the entity in whatever class represents it:
 // entity_instance = METRIC_ENTITY_my_entity.instantiate(my_entity_id, ...);
+//
+//
+// Example of defining and instantiating a metric
+// -----------------------------------------------------
+// Define an entity type at the top of your .cpp file (not within any namespace):
+// METRIC_DEFINE_gauge_int64(my_entity,
+//                           my_gauge_name,
+//                           dsn::metric_unit::kMilliSeconds,
+//                           "the description for my gauge");
+//
+// To use the metric prototype, declare it at the top of any .h/.cpp file (not within any
+// namespace):
+// METRIC_DECLARE_gauge_int64(my_gauge_name);
+//
+// Instantiating the metric in whatever class represents it with some initial arguments, if any:
+// metric_instance = METRIC_my_gauge_name.instantiate(entity_instance, ...);
 
-// Define a new entity type.
+// Convenient macros are provided to define entity types and metric prototypes.
 #define METRIC_DEFINE_entity(name) ::dsn::metric_entity_prototype METRIC_ENTITY_##name(#name)
+#define METRIC_DEFINE_gauge_int64(entity_type, name, unit, desc, ...)                              \
+    ::dsn::gauge_prototype<int64_t> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_gauge_double(entity_type, name, unit, desc, ...)                             \
+    ::dsn::gauge_prototype<double> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
+#define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
+#define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
 
 namespace dsn {
 
@@ -222,6 +246,9 @@ class metric_prototype_with : public metric_prototype
 // however, its lastest value is visible.
 class metric : public ref_counter
 {
+public:
+    const metric_prototype *prototype() const { return _prototype; }
+
 protected:
     explicit metric(const metric_prototype *prototype);
     virtual ~metric() = default;
@@ -232,4 +259,50 @@ class metric : public ref_counter
     DISALLOW_COPY_AND_ASSIGN(metric);
 };
 
+// A gauge is an instantaneous measurement of a discrete value. It represents a single numerical
+// value that can arbitrarily go up and down. It's typically used for measured values like current
+// memory usage, the total capacity and available ratio of a disk, etc.
+template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class gauge : public metric
+{
+public:
+    T value() const { return _value.load(std::memory_order_relaxed); }
+
+    void set(const T &val) { _value.store(val, std::memory_order_relaxed); }
+
+protected:
+    gauge(const metric_prototype *prototype, const T &initial_val)
+        : metric(prototype), _value(initial_val)
+    {
+    }
+
+    gauge(const metric_prototype *prototype);
+
+    virtual ~gauge() = default;
+
+private:
+    friend class metric_entity;
+    friend class ref_ptr<gauge<T>>;
+
+    std::atomic<T> _value;
+
+    DISALLOW_COPY_AND_ASSIGN(gauge);
+};
+
+template <>
+gauge<int64_t>::gauge(const metric_prototype *prototype) : gauge(prototype, 0)
+{
+}
+
+template <>
+gauge<double>::gauge(const metric_prototype *prototype) : gauge(prototype, 0.0)
+{
+}
+
+template <typename T>
+using gauge_ptr = ref_ptr<gauge<T>>;
+
+template <typename T>
+using gauge_prototype = metric_prototype_with<gauge<T>>;
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 5bc979bc43..e1b72e2b65 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -24,15 +24,19 @@ namespace dsn {
 class my_gauge : public metric
 {
 public:
+    int64_t value() { return _value; }
+
+protected:
     explicit my_gauge(const metric_prototype *prototype) : metric(prototype), _value(0) {}
 
     my_gauge(const metric_prototype *prototype, int64_t value) : metric(prototype), _value(value) {}
 
     virtual ~my_gauge() = default;
 
-    int64_t value() { return _value; }
-
 private:
+    friend class metric_entity;
+    friend class ref_ptr<my_gauge>;
+
     int64_t _value;
 
     DISALLOW_COPY_AND_ASSIGN(my_gauge);
@@ -68,6 +72,16 @@ METRIC_DEFINE_my_gauge(my_replica,
                        dsn::metric_unit::kMilliSeconds,
                        "a replica-level duration for test");
 
+METRIC_DEFINE_gauge_int64(my_server,
+                          test_gauge_int64,
+                          dsn::metric_unit::kMilliSeconds,
+                          "a server-level gauge of int64 type for test");
+
+METRIC_DEFINE_gauge_double(my_server,
+                           test_gauge_double,
+                           dsn::metric_unit::kSeconds,
+                           "a server-level gauge of double type for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -222,4 +236,88 @@ TEST(metrics_test, recreate_metric)
     ASSERT_EQ(my_metric->value(), 5);
 }
 
+TEST(metrics_test, gauge_int64)
+{
+
+    // Test cases:
+    // - create a gauge of int64 type without initial value, then increase
+    // - create a gauge of int64 type without initial value, then decrease
+    // - create a gauge of int64 type with initial value, then increase
+    // - create a gauge of int64 type with initial value, then decrease
+    struct test_case
+    {
+        std::string entity_id;
+        bool use_default_value;
+        int64_t initial_value;
+        int64_t new_value;
+    } tests[] = {{"server_5", true, 0, 5},
+                 {"server_6", true, 0, -5},
+                 {"server_7", false, 10, 100},
+                 {"server_8", false, 100, 10}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        gauge_ptr<int64_t> my_metric;
+        if (test.use_default_value) {
+            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity);
+        } else {
+            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity, test.initial_value);
+        }
+
+        ASSERT_EQ(my_metric->value(), test.initial_value);
+
+        my_metric->set(test.new_value);
+        ASSERT_EQ(my_metric->value(), test.new_value);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_int64].get()), my_metric.get());
+
+        ASSERT_EQ(my_metric->prototype(),
+                  static_cast<const metric_prototype *>(&METRIC_test_gauge_int64));
+    }
+}
+
+TEST(metrics_test, gauge_double)
+{
+
+    // Test cases:
+    // - create a gauge of double type without initial value, then increase
+    // - create a gauge of double type without initial value, then decrease
+    // - create a gauge of double type with initial value, then increase
+    // - create a gauge of double type with initial value, then decrease
+    struct test_case
+    {
+        std::string entity_id;
+        bool use_default_value;
+        double initial_value;
+        double new_value;
+    } tests[] = {{"server_9", true, 0.0, 5.278},
+                 {"server_10", true, 0.0, -5.278},
+                 {"server_11", false, 10.756, 100.128},
+                 {"server_12", false, 100.128, 10.756}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        gauge_ptr<double> my_metric;
+        if (test.use_default_value) {
+            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity);
+        } else {
+            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity, test.initial_value);
+        }
+
+        ASSERT_DOUBLE_EQ(my_metric->value(), test.initial_value);
+
+        my_metric->set(test.new_value);
+        ASSERT_DOUBLE_EQ(my_metric->value(), test.new_value);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_double].get()), my_metric.get());
+
+        ASSERT_EQ(my_metric->prototype(),
+                  static_cast<const metric_prototype *>(&METRIC_test_gauge_double));
+    }
+}
+
 } // namespace dsn

From 4c7b92995eac79821f68e779c7766e3bc77042c7 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 25 Mar 2022 17:34:59 +0800
Subject: [PATCH 16/21] feat(new_metrics): implement the counter (#1081)

---
 include/dsn/utility/metrics.h   |  54 ++++++++++++
 src/utils/test/metrics_test.cpp | 147 +++++++++++++++++++++++++++++++-
 2 files changed, 198 insertions(+), 3 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 06e76017fb..7c1f479002 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -27,6 +27,7 @@
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
+#include <dsn/utility/long_adder.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
 #include <dsn/utility/string_view.h>
@@ -68,11 +69,27 @@
     ::dsn::gauge_prototype<int64_t> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
 #define METRIC_DEFINE_gauge_double(entity_type, name, unit, desc, ...)                             \
     ::dsn::gauge_prototype<double> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})
+// There are 2 kinds of counters:
+// - `counter` is the general type of counter that is implemented by striped_long_adder, which can
+//   achieve high performance while consuming less memory if it's not updated very frequently.
+// - `concurrent_counter` uses concurrent_long_adder as the underlying implementation. It has
+//   higher performance while consuming more memory if it's updated very frequently.
+// See also include/dsn/utility/long_adder.h for details.
+#define METRIC_DEFINE_counter(entity_type, name, unit, desc, ...)                                  \
+    ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name(                             \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_concurrent_counter(entity_type, name, unit, desc, ...)                       \
+    ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name(                          \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
 #define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
+#define METRIC_DECLARE_counter(name)                                                               \
+    extern ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name
+#define METRIC_DECLARE_concurrent_counter(name)                                                    \
+    extern ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name
 
 namespace dsn {
 
@@ -176,6 +193,7 @@ enum class metric_unit
     kMicroSeconds,
     kMilliSeconds,
     kSeconds,
+    kRequests,
     kInvalidUnit,
 };
 
@@ -305,4 +323,40 @@ using gauge_ptr = ref_ptr<gauge<T>>;
 template <typename T>
 using gauge_prototype = metric_prototype_with<gauge<T>>;
 
+// A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
+// used to measure the number of tasks in queues, current number of running manual compacts,
+// etc. All counters start out at 0.
+template <typename Adder = striped_long_adder>
+class counter : public metric
+{
+public:
+    int64_t value() const { return _adder.value(); }
+
+    void increment_by(int64_t x) { _adder.increment_by(x); }
+    void increment() { _adder.increment(); }
+    void decrement() { _adder.decrement(); }
+
+    void reset() { _adder.reset(); }
+
+protected:
+    counter(const metric_prototype *prototype) : metric(prototype) {}
+
+    virtual ~counter() = default;
+
+private:
+    friend class metric_entity;
+    friend class ref_ptr<counter<Adder>>;
+
+    long_adder_wrapper<Adder> _adder;
+
+    DISALLOW_COPY_AND_ASSIGN(counter);
+};
+
+template <typename Adder = striped_long_adder>
+using counter_ptr = ref_ptr<counter<Adder>>;
+using concurrent_counter_ptr = counter_ptr<concurrent_long_adder>;
+
+template <typename Adder = striped_long_adder>
+using counter_prototype = metric_prototype_with<counter<Adder>>;
+
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index e1b72e2b65..123661c888 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -16,6 +16,10 @@
 // under the License.
 
 #include <dsn/utility/metrics.h>
+#include <dsn/utility/rand.h>
+
+#include <thread>
+#include <vector>
 
 #include <gtest/gtest.h>
 
@@ -82,6 +86,16 @@ METRIC_DEFINE_gauge_double(my_server,
                            dsn::metric_unit::kSeconds,
                            "a server-level gauge of double type for test");
 
+METRIC_DEFINE_counter(my_server,
+                      test_counter,
+                      dsn::metric_unit::kRequests,
+                      "a server-level counter for test");
+
+METRIC_DEFINE_concurrent_counter(my_server,
+                                 test_concurrent_counter,
+                                 dsn::metric_unit::kRequests,
+                                 "a server-level concurrent_counter for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -238,7 +252,6 @@ TEST(metrics_test, recreate_metric)
 
 TEST(metrics_test, gauge_int64)
 {
-
     // Test cases:
     // - create a gauge of int64 type without initial value, then increase
     // - create a gauge of int64 type without initial value, then decrease
@@ -280,7 +293,6 @@ TEST(metrics_test, gauge_int64)
 
 TEST(metrics_test, gauge_double)
 {
-
     // Test cases:
     // - create a gauge of double type without initial value, then increase
     // - create a gauge of double type without initial value, then decrease
@@ -313,11 +325,140 @@ TEST(metrics_test, gauge_double)
         ASSERT_DOUBLE_EQ(my_metric->value(), test.new_value);
 
         auto metrics = my_server_entity->metrics();
-        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_double].get()), my_metric.get());
+        ASSERT_EQ(metrics[&METRIC_test_gauge_double].get(), static_cast<metric *>(my_metric.get()));
 
         ASSERT_EQ(my_metric->prototype(),
                   static_cast<const metric_prototype *>(&METRIC_test_gauge_double));
     }
 }
 
+void execute(int64_t num_threads, std::function<void(int)> runner)
+{
+    std::vector<std::thread> threads;
+    for (int64_t i = 0; i < num_threads; i++) {
+        threads.emplace_back([i, &runner]() { runner(i); });
+    }
+    for (auto &t : threads) {
+        t.join();
+    }
+}
+
+template <typename Adder>
+void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
+                              int64_t base_value,
+                              int64_t num_operations,
+                              int64_t num_threads,
+                              int64_t &result)
+{
+    std::vector<int64_t> deltas;
+    int64_t n = num_operations * num_threads;
+    deltas.reserve(n);
+
+    int64_t expected_value = base_value;
+    for (int64_t i = 0; i < n; ++i) {
+        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
+        if (delta % 3 == 0) {
+            delta = -delta;
+        }
+        expected_value += delta;
+        deltas.push_back(delta);
+    }
+
+    execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->increment_by(deltas[tid * num_operations + i]);
+        }
+    });
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
+                           int64_t base_value,
+                           int64_t num_operations,
+                           int64_t num_threads,
+                           int64_t &result)
+{
+    execute(num_threads, [num_operations, &my_metric](int) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->increment();
+        }
+    });
+
+    int64_t expected_value = base_value + num_operations * num_threads;
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
+                           int64_t base_value,
+                           int64_t num_operations,
+                           int64_t num_threads,
+                           int64_t &result)
+{
+    execute(num_threads, [num_operations, &my_metric](int) mutable {
+        for (int64_t i = 0; i < num_operations; ++i) {
+            my_metric->decrement();
+        }
+    });
+
+    int64_t expected_value = base_value - num_operations * num_threads;
+    ASSERT_EQ(my_metric->value(), expected_value);
+    result = expected_value;
+}
+
+template <typename Adder>
+void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
+{
+    // Test cases:
+    // - test the counter with small-scale computations
+    // - test the counter with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t increments_by;
+        int64_t increments;
+        int64_t decrements;
+    } tests[] = {{"server_9", 100, 1000, 1000}, {"server_10", 1000000, 10000000, 10000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        int64_t value = 0;
+        ASSERT_EQ(my_metric->value(), value);
+        run_counter_increment_by(my_metric, value, test.increments_by, num_threads, value);
+        run_counter_increment(my_metric, value, test.increments, num_threads, value);
+        run_counter_decrement(my_metric, value, test.decrements, num_threads, value);
+
+        my_metric->reset();
+        ASSERT_EQ(my_metric->value(), 0);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
+
+        ASSERT_EQ(my_metric->prototype(), prototype);
+    }
+}
+
+template <typename Adder>
+void run_counter_cases(::dsn::counter_prototype<Adder> *prototype)
+{
+    // Do single-threaded tests
+    run_counter_cases(prototype, 1);
+
+    // Do multi-threaded tests
+    run_counter_cases(prototype, 4);
+}
+
+TEST(metrics_test, counter)
+{
+    // Test both kinds of counter
+    run_counter_cases<striped_long_adder>(&METRIC_test_counter);
+    run_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_counter);
+}
+
 } // namespace dsn

From b3dc8d2d925a615f022672ad9ed6b291b47558cd Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 13 Apr 2022 20:43:23 +0800
Subject: [PATCH 17/21] feat(new_metrics): implement the volatile counter
 (#1083)

---
 include/dsn/utility/metrics.h   |  62 ++++++++++++---
 src/utils/test/metrics_test.cpp | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 183 insertions(+), 10 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 7c1f479002..de7be1e831 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -76,10 +76,16 @@
 //   higher performance while consuming more memory if it's updated very frequently.
 // See also include/dsn/utility/long_adder.h for details.
 #define METRIC_DEFINE_counter(entity_type, name, unit, desc, ...)                                  \
-    ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name(                             \
+    dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name(                          \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 #define METRIC_DEFINE_concurrent_counter(entity_type, name, unit, desc, ...)                       \
-    ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name(                          \
+    dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name(                       \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_volatile_counter(entity_type, name, unit, desc, ...)                         \
+    dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name(                           \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_concurrent_volatile_counter(entity_type, name, unit, desc, ...)              \
+    dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name(                        \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
 // The following macros act as forward declarations for entity types and metric prototypes.
@@ -87,9 +93,13 @@
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
 #define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name
 #define METRIC_DECLARE_counter(name)                                                               \
-    extern ::dsn::counter_prototype<::dsn::striped_long_adder> METRIC_##name
+    extern dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name
 #define METRIC_DECLARE_concurrent_counter(name)                                                    \
-    extern ::dsn::counter_prototype<::dsn::concurrent_long_adder> METRIC_##name
+    extern dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name
+#define METRIC_DECLARE_volatile_counter(name)                                                      \
+    extern dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name
+#define METRIC_DECLARE_concurrent_volatile_counter(name)                                           \
+    extern dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name
 
 namespace dsn {
 
@@ -326,11 +336,33 @@ using gauge_prototype = metric_prototype_with<gauge<T>>;
 // A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
 // used to measure the number of tasks in queues, current number of running manual compacts,
 // etc. All counters start out at 0.
-template <typename Adder = striped_long_adder>
+//
+// `IsVolatile` is false by default. Once it's specified as true, the counter will be volatile.
+// The value() function of a volatile counter will reset the counter atomically after its value
+// is fetched. A volatile counter can also be called as a "recent" counter.
+//
+// Sometimes "recent" counters are needed, such as the number of recent failed beacons sent from
+// replica server, the count of updating configurations of partitions recently, etc. The "recent"
+// count can be considered to be the accumulated count since it has been fetched last by value().
+//
+// In most cases, a general (i.e. non-volatile) counter is enough, which means it can also work
+// for "recent" counters. For example, in Prometheus, delta() can be used to compute "recent"
+// count for a general counter. Therefore, declare a counter as volatile only when necessary.
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
 class counter : public metric
 {
 public:
-    int64_t value() const { return _adder.value(); }
+    template <bool Volatile = IsVolatile, typename = typename std::enable_if<!Volatile>::type>
+    int64_t value() const
+    {
+        return _adder.value();
+    }
+
+    template <bool Volatile = IsVolatile, typename = typename std::enable_if<Volatile>::type>
+    int64_t value()
+    {
+        return _adder.fetch_and_reset();
+    }
 
     void increment_by(int64_t x) { _adder.increment_by(x); }
     void increment() { _adder.increment(); }
@@ -345,18 +377,28 @@ class counter : public metric
 
 private:
     friend class metric_entity;
-    friend class ref_ptr<counter<Adder>>;
+    friend class ref_ptr<counter<Adder, IsVolatile>>;
 
     long_adder_wrapper<Adder> _adder;
 
     DISALLOW_COPY_AND_ASSIGN(counter);
 };
 
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
+using counter_ptr = ref_ptr<counter<Adder, IsVolatile>>;
+
+template <bool IsVolatile = false>
+using concurrent_counter_ptr = counter_ptr<concurrent_long_adder, IsVolatile>;
+
+template <typename Adder = striped_long_adder, bool IsVolatile = false>
+using counter_prototype = metric_prototype_with<counter<Adder, IsVolatile>>;
+
 template <typename Adder = striped_long_adder>
-using counter_ptr = ref_ptr<counter<Adder>>;
-using concurrent_counter_ptr = counter_ptr<concurrent_long_adder>;
+using volatile_counter_ptr = ref_ptr<counter<Adder, true>>;
+
+using concurrent_volatile_counter_ptr = counter_ptr<concurrent_long_adder, true>;
 
 template <typename Adder = striped_long_adder>
-using counter_prototype = metric_prototype_with<counter<Adder>>;
+using volatile_counter_prototype = metric_prototype_with<counter<Adder, true>>;
 
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 123661c888..cd38ad5116 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -96,6 +96,16 @@ METRIC_DEFINE_concurrent_counter(my_server,
                                  dsn::metric_unit::kRequests,
                                  "a server-level concurrent_counter for test");
 
+METRIC_DEFINE_volatile_counter(my_server,
+                               test_volatile_counter,
+                               dsn::metric_unit::kRequests,
+                               "a server-level volatile_counter for test");
+
+METRIC_DEFINE_concurrent_volatile_counter(my_server,
+                                          test_concurrent_volatile_counter,
+                                          dsn::metric_unit::kRequests,
+                                          "a server-level concurrent_volatile_counter for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -461,4 +471,125 @@ TEST(metrics_test, counter)
     run_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_counter);
 }
 
+template <typename Adder>
+void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_metric,
+                                         int64_t num_operations,
+                                         int64_t num_threads_write,
+                                         int64_t num_threads_read)
+{
+    std::vector<int64_t> deltas;
+    int64_t n = num_operations * num_threads_write;
+    deltas.reserve(n);
+
+    int64_t expected_value = 0;
+    for (int64_t i = 0; i < n; ++i) {
+        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
+        if (delta % 3 == 0) {
+            delta = -delta;
+        }
+        expected_value += delta;
+        deltas.push_back(delta);
+    }
+
+    auto results = new_cacheline_aligned_int64_array(static_cast<uint32_t>(num_threads_read));
+    std::vector<std::atomic_bool> completed(num_threads_write);
+    for (int64_t i = 0; i < num_threads_write; ++i) {
+        completed[i].store(false);
+    }
+
+    ASSERT_EQ(my_metric->value(), 0);
+
+    execute(num_threads_write + num_threads_read,
+            [num_operations, num_threads_write, &my_metric, &deltas, &results, &completed](
+                int tid) mutable {
+                if (tid < num_threads_write) {
+                    for (int64_t i = 0; i < num_operations; ++i) {
+                        my_metric->increment_by(deltas[tid * num_operations + i]);
+                    }
+                    completed[tid].store(true);
+                } else {
+                    bool done = false;
+                    do {
+                        int64_t i = 0;
+                        for (; i < num_threads_write && completed[i].load(); ++i) {
+                        }
+                        if (i >= num_threads_write) {
+                            // All of the increment threads have finished, thus the loop can
+                            // be broken after the last time the value is fetched.
+                            done = true;
+                        }
+
+                        auto value = my_metric->value();
+                        if (value == 0) {
+                            // If zero is fetched, it's likely that recently the counter is
+                            // not updated frequently. Thus yield and try for the next time.
+                            std::this_thread::yield();
+                        } else {
+                            auto r = results.get();
+                            r[tid - num_threads_write]._value += value;
+                        }
+                    } while (!done);
+                }
+            });
+
+    int64_t value = 0;
+    for (int64_t i = 0; i < num_threads_read; ++i) {
+        value += results.get()[i]._value.load();
+    }
+    ASSERT_EQ(value, expected_value);
+    ASSERT_EQ(my_metric->value(), 0);
+}
+
+template <typename Adder>
+void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype,
+                                int64_t num_threads_write,
+                                int64_t num_threads_read)
+{
+    // Test cases:
+    // - test the volatile counter with small-scale computations
+    // - test the volatile counter with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t num_operations;
+    } tests[] = {{"server_11", 5000}, {"server_12", 5000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        run_volatile_counter_write_and_read(
+            my_metric, test.num_operations, num_threads_write, num_threads_read);
+
+        auto metrics = my_server_entity->metrics();
+        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
+
+        ASSERT_EQ(my_metric->prototype(), prototype);
+    }
+}
+
+template <typename Adder>
+void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype)
+{
+    // Write with single thread and read with single thread
+    run_volatile_counter_cases(prototype, 1, 1);
+
+    // Write with multiple threads and read with single thread
+    run_volatile_counter_cases(prototype, 2, 1);
+
+    // Write with single thread and read with multiple threads
+    run_volatile_counter_cases(prototype, 1, 2);
+
+    // Write with multiple threads and read with multiple threads
+    run_volatile_counter_cases(prototype, 4, 2);
+}
+
+TEST(metrics_test, volatile_counter)
+{
+    // Test both kinds of volatile counter
+    run_volatile_counter_cases<striped_long_adder>(&METRIC_test_volatile_counter);
+    run_volatile_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_volatile_counter);
+}
+
 } // namespace dsn

From 447bde75f06aca1b8ad22c5403c152caa2e12a10 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Tue, 19 Apr 2022 21:49:02 +0800
Subject: [PATCH 18/21] feat(new_metrics): make the counter increment
 monotonically (#1095)

---
 include/dsn/utility/metrics.h   |  62 +++++++++++++---
 src/utils/metrics.cpp           |   1 -
 src/utils/test/metrics_test.cpp | 122 +++++++++++++++++++++++---------
 3 files changed, 142 insertions(+), 43 deletions(-)

diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index de7be1e831..22da07b33c 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -24,6 +24,8 @@
 #include <unordered_map>
 #include <utility>
 
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
@@ -287,9 +289,16 @@ class metric : public ref_counter
     DISALLOW_COPY_AND_ASSIGN(metric);
 };
 
-// A gauge is an instantaneous measurement of a discrete value. It represents a single numerical
-// value that can arbitrarily go up and down. It's typically used for measured values like current
-// memory usage, the total capacity and available ratio of a disk, etc.
+// A gauge is a metric that represents a single numerical value that can arbitrarily go up and
+// down. Usually there are 2 scenarios for a guage.
+//
+// Firstly, a gauge can be used as an instantaneous measurement of a discrete value. Typical
+// usages in this scenario are current memory usage, the total capacity and available ratio of
+// a disk, etc.
+//
+// Secondly, a gauge can be used as a counter that increases and decreases. In this scenario only
+// integral types are supported, and its typical usages are the number of tasks in queues, current
+// number of running manual compacts, etc.
 template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
 class gauge : public metric
 {
@@ -298,6 +307,34 @@ class gauge : public metric
 
     void set(const T &val) { _value.store(val, std::memory_order_relaxed); }
 
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void increment_by(Int x)
+    {
+        _value.fetch_add(x, std::memory_order_relaxed);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void decrement_by(Int x)
+    {
+        increment_by(-x);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void increment()
+    {
+        increment_by(1);
+    }
+
+    template <typename Int = T,
+              typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+    void decrement()
+    {
+        increment_by(-1);
+    }
+
 protected:
     gauge(const metric_prototype *prototype, const T &initial_val)
         : metric(prototype), _value(initial_val)
@@ -333,9 +370,13 @@ using gauge_ptr = ref_ptr<gauge<T>>;
 template <typename T>
 using gauge_prototype = metric_prototype_with<gauge<T>>;
 
-// A counter in essence is a 64-bit integer that can be incremented and decremented. It can be
-// used to measure the number of tasks in queues, current number of running manual compacts,
-// etc. All counters start out at 0.
+// A counter in essence is a 64-bit integer that increases monotonically. It should be noted that
+// the counter does not support to decrease. If decrease is needed, please consider to use the
+// gauge instead.
+//
+// The counter can be typically used to measure the number of processed requests, which in the
+// future can be help to compute the QPS. All counters start out at 0, and are non-negative
+// since they are monotonic.
 //
 // `IsVolatile` is false by default. Once it's specified as true, the counter will be volatile.
 // The value() function of a volatile counter will reset the counter atomically after its value
@@ -364,9 +405,14 @@ class counter : public metric
         return _adder.fetch_and_reset();
     }
 
-    void increment_by(int64_t x) { _adder.increment_by(x); }
+    // NOTICE: x MUST be a non-negative integer.
+    void increment_by(int64_t x)
+    {
+        dassert_f(x >= 0, "delta({}) by increment for counter must be a non-negative integer", x);
+        _adder.increment_by(x);
+    }
+
     void increment() { _adder.increment(); }
-    void decrement() { _adder.decrement(); }
 
     void reset() { _adder.reset(); }
 
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 8f96792b81..0ec1415cac 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -18,7 +18,6 @@
 #include <dsn/utility/metrics.h>
 
 #include <dsn/c/api_utilities.h>
-#include <dsn/dist/fmt_logging.h>
 
 namespace dsn {
 
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index cd38ad5116..6ba5b65687 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -294,7 +294,7 @@ TEST(metrics_test, gauge_int64)
         ASSERT_EQ(my_metric->value(), test.new_value);
 
         auto metrics = my_server_entity->metrics();
-        ASSERT_EQ(static_cast<metric *>(metrics[&METRIC_test_gauge_int64].get()), my_metric.get());
+        ASSERT_EQ(metrics[&METRIC_test_gauge_int64].get(), static_cast<metric *>(my_metric.get()));
 
         ASSERT_EQ(my_metric->prototype(),
                   static_cast<const metric_prototype *>(&METRIC_test_gauge_int64));
@@ -353,12 +353,26 @@ void execute(int64_t num_threads, std::function<void(int)> runner)
     }
 }
 
-template <typename Adder>
-void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
-                              int64_t base_value,
-                              int64_t num_operations,
-                              int64_t num_threads,
-                              int64_t &result)
+template <typename MetricPtr>
+void increment_by(std::integral_constant<bool, true>, MetricPtr &my_metric, int64_t x)
+{
+    my_metric->increment_by(x);
+}
+
+template <typename MetricPtr>
+void increment_by(std::integral_constant<bool, false>, MetricPtr &my_metric, int64_t x)
+{
+    // If x is positive, metric will be increased; otherwise, the metric will be decreased.
+    my_metric->decrement_by(-x);
+}
+
+template <bool IsIncrement, typename MetricPtr>
+void run_increment_by(MetricPtr &my_metric,
+                      int64_t base_value,
+                      int64_t num_operations,
+                      int64_t num_threads,
+                      int64_t &result,
+                      bool allow_negative = true)
 {
     std::vector<int64_t> deltas;
     int64_t n = num_operations * num_threads;
@@ -367,7 +381,7 @@ void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
     int64_t expected_value = base_value;
     for (int64_t i = 0; i < n; ++i) {
         auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
-        if (delta % 3 == 0) {
+        if (allow_negative && delta % 3 == 0) {
             delta = -delta;
         }
         expected_value += delta;
@@ -376,19 +390,20 @@ void run_counter_increment_by(::dsn::counter_ptr<Adder> &my_metric,
 
     execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
-            my_metric->increment_by(deltas[tid * num_operations + i]);
+            auto delta = deltas[tid * num_operations + i];
+            increment_by(std::integral_constant<bool, IsIncrement>{}, my_metric, delta);
         }
     });
     ASSERT_EQ(my_metric->value(), expected_value);
     result = expected_value;
 }
 
-template <typename Adder>
-void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
-                           int64_t base_value,
-                           int64_t num_operations,
-                           int64_t num_threads,
-                           int64_t &result)
+template <typename MetricPtr>
+void run_increment(MetricPtr &my_metric,
+                   int64_t base_value,
+                   int64_t num_operations,
+                   int64_t num_threads,
+                   int64_t &result)
 {
     execute(num_threads, [num_operations, &my_metric](int) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
@@ -401,12 +416,12 @@ void run_counter_increment(::dsn::counter_ptr<Adder> &my_metric,
     result = expected_value;
 }
 
-template <typename Adder>
-void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
-                           int64_t base_value,
-                           int64_t num_operations,
-                           int64_t num_threads,
-                           int64_t &result)
+template <typename MetricPtr>
+void run_decrement(MetricPtr &my_metric,
+                   int64_t base_value,
+                   int64_t num_operations,
+                   int64_t num_threads,
+                   int64_t &result)
 {
     execute(num_threads, [num_operations, &my_metric](int) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
@@ -419,8 +434,52 @@ void run_counter_decrement(::dsn::counter_ptr<Adder> &my_metric,
     result = expected_value;
 }
 
+void run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype, int64_t num_threads)
+{
+    // Test cases:
+    // - test the gauge with small-scale computations
+    // - test the gauge with large-scale computations
+    struct test_case
+    {
+        std::string entity_id;
+        int64_t increments_by;
+        int64_t decrements_by;
+        int64_t increments;
+        int64_t decrements;
+    } tests[] = {{"server_13", 100, 100, 1000, 1000},
+                 {"server_14", 1000000, 1000000, 10000000, 10000000}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        auto my_metric = prototype->instantiate(my_server_entity);
+
+        int64_t value = 0;
+        ASSERT_EQ(my_metric->value(), value);
+        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value);
+        run_increment_by<false>(my_metric, value, test.decrements_by, num_threads, value);
+        run_increment(my_metric, value, test.increments, num_threads, value);
+        run_decrement(my_metric, value, test.decrements, num_threads, value);
+
+        // Reset to 0 since this metric could be used again
+        my_metric->set(0);
+        ASSERT_EQ(my_metric->value(), 0);
+    }
+}
+
+void run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype)
+{
+    // Do single-threaded tests
+    run_gauge_increment_cases(prototype, 1);
+
+    // Do multi-threaded tests
+    run_gauge_increment_cases(prototype, 4);
+}
+
+TEST(metrics_test, gauge_increment) { run_gauge_increment_cases(&METRIC_test_gauge_int64); }
+
 template <typename Adder>
-void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
+void run_counter_cases(dsn::counter_prototype<Adder> *prototype, int64_t num_threads)
 {
     // Test cases:
     // - test the counter with small-scale computations
@@ -430,8 +489,7 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
         std::string entity_id;
         int64_t increments_by;
         int64_t increments;
-        int64_t decrements;
-    } tests[] = {{"server_9", 100, 1000, 1000}, {"server_10", 1000000, 10000000, 10000000}};
+    } tests[] = {{"server_15", 100, 1000}, {"server_16", 1000000, 10000000}};
 
     for (const auto &test : tests) {
         auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
@@ -440,9 +498,8 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
 
         int64_t value = 0;
         ASSERT_EQ(my_metric->value(), value);
-        run_counter_increment_by(my_metric, value, test.increments_by, num_threads, value);
-        run_counter_increment(my_metric, value, test.increments, num_threads, value);
-        run_counter_decrement(my_metric, value, test.decrements, num_threads, value);
+        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value, false);
+        run_increment(my_metric, value, test.increments, num_threads, value);
 
         my_metric->reset();
         ASSERT_EQ(my_metric->value(), 0);
@@ -450,12 +507,12 @@ void run_counter_cases(::dsn::counter_prototype<Adder> *prototype, int64_t num_t
         auto metrics = my_server_entity->metrics();
         ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
 
-        ASSERT_EQ(my_metric->prototype(), prototype);
+        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));
     }
 }
 
 template <typename Adder>
-void run_counter_cases(::dsn::counter_prototype<Adder> *prototype)
+void run_counter_cases(dsn::counter_prototype<Adder> *prototype)
 {
     // Do single-threaded tests
     run_counter_cases(prototype, 1);
@@ -484,9 +541,6 @@ void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_me
     int64_t expected_value = 0;
     for (int64_t i = 0; i < n; ++i) {
         auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));
-        if (delta % 3 == 0) {
-            delta = -delta;
-        }
         expected_value += delta;
         deltas.push_back(delta);
     }
@@ -552,7 +606,7 @@ void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototyp
     {
         std::string entity_id;
         int64_t num_operations;
-    } tests[] = {{"server_11", 5000}, {"server_12", 5000000}};
+    } tests[] = {{"server_17", 5000}, {"server_18", 5000000}};
 
     for (const auto &test : tests) {
         auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
@@ -565,7 +619,7 @@ void run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototyp
         auto metrics = my_server_entity->metrics();
         ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));
 
-        ASSERT_EQ(my_metric->prototype(), prototype);
+        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));
     }
 }
 

From 9cecf7fa8261b135264cab067fb65e26252d713b Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Wed, 25 May 2022 18:08:17 +0800
Subject: [PATCH 19/21] feat(new_metrics): support to find multiple nth
 elements of a sequence container at a time based on nth_element() of STL
 (#1106)

---
 include/dsn/utility/nth_element.h             | 128 ++++++++
 src/perf_counter/perf_counter_atomic.h        |  18 +-
 src/utils/test/CMakeLists.txt                 |   1 +
 .../test/nth_element_bench/CMakeLists.txt     |  39 +++
 .../nth_element_bench/nth_element_bench.cpp   | 230 ++++++++++++++
 src/utils/test/nth_element_test.cpp           | 288 ++++++++++++++++++
 src/utils/test/nth_element_utils.h            | 180 +++++++++++
 7 files changed, 881 insertions(+), 3 deletions(-)
 create mode 100644 include/dsn/utility/nth_element.h
 create mode 100644 src/utils/test/nth_element_bench/CMakeLists.txt
 create mode 100644 src/utils/test/nth_element_bench/nth_element_bench.cpp
 create mode 100644 src/utils/test/nth_element_test.cpp
 create mode 100644 src/utils/test/nth_element_utils.h

diff --git a/include/dsn/utility/nth_element.h b/include/dsn/utility/nth_element.h
new file mode 100644
index 0000000000..d6fbe85d04
--- /dev/null
+++ b/include/dsn/utility/nth_element.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+
+namespace dsn {
+
+// The finder helps to find multiple nth elements of a sequence container (e.g. std::vector)
+// at a time, based on nth_element() of STL.
+template <typename T, typename Compare = std::less<T>>
+class stl_nth_element_finder
+{
+public:
+    using value_type = T;
+    using container_type = std::vector<value_type>;
+    using size_type = typename container_type::size_type;
+    using nth_container_type = std::vector<size_type>;
+
+    stl_nth_element_finder(const Compare &comp = Compare()) : _nths(), _elements(), _comp(comp) {}
+
+    // Set with specified nth indexes. An nth index is typically an index of the sequence
+    // container (e.g. std::vector). This method allows nth indexes to be updated dynamically.
+    //
+    // There are 2 reasons why both `_nths` and `_elements` are put into the sequence container:
+    //
+    // (1) The users of stl_nth_element_finder, such as the metric of percentile, may pass
+    // duplicate nth indexes to `_nths`. For example, suppose that the sampled window size is
+    // 100, both P99 and P999 will have the same nth element -- namely 99th element. Thus it's
+    // will be convenient for users if `nths` can contain duplicate elements.
+    //
+    // The sequence container can contain duplicate elements, even if all elements in the container
+    // are sorted. Therefore, there may be identical indexes in `nths`.
+    //
+    // (2) The sequence container is more cache-friendly. While an nth element is selected, it's
+    // cache-friendly to write it into `_elements`. After all nth elements are collected into
+    // `_elements`, scanning them (`elements()`) is also cache-friendly, even if there are many
+    // nth indexes in `_nths`. In contrast to this, access directly to the nth element in array
+    // will not be cache-friendly especially when the array is large.
+    //
+    // Notice that the indexes in `nths` list must be ordered. After `operator()` is executed,
+    // the elements returned by `elements()` will be in the order of the sorted nth indexes.
+    void set_nths(const nth_container_type &nths)
+    {
+        _nths = nths;
+        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),
+                  "nth indexes({}) is not sorted",
+                  fmt::join(_nths, " "));
+
+        _elements.assign(_nths.size(), value_type{});
+    }
+
+    // Find the multiple nth elements.
+    //
+    // Typically `begin` is the beginning iterator of the sequence container. `begin` plus each
+    // member of `_nths` will be the real nth element of the sequence container.
+    //
+    // [first, last) is the real range for finding the multiple nth elements.
+    template <typename RandomAccessIterator>
+    void
+    operator()(RandomAccessIterator begin, RandomAccessIterator first, RandomAccessIterator last)
+    {
+        for (size_type i = 0; i < _nths.size();) {
+            auto nth_iter = begin + _nths[i];
+            dassert_f(nth_iter >= first && nth_iter < last, "Invalid iterators for nth_element()");
+            std::nth_element(first, nth_iter, last, _comp);
+            _elements[i] = *nth_iter;
+
+            // Identical nth indexes should be processed. See `set_nths()` for details.
+            for (++i; i < _nths.size() && _nths[i] == _nths[i - 1]; ++i) {
+                _elements[i] = *nth_iter;
+            }
+
+            first = nth_iter + 1;
+        }
+    }
+
+    const container_type &elements() const { return _elements; }
+
+private:
+    nth_container_type _nths;
+    container_type _elements;
+    Compare _comp;
+
+    DISALLOW_COPY_AND_ASSIGN(stl_nth_element_finder);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+class floating_comparator
+{
+public:
+    bool operator()(const T &lhs, const T &rhs) const
+    {
+        return rhs - lhs >= std::numeric_limits<T>::epsilon();
+    }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_stl_nth_element_finder = stl_nth_element_finder<T, floating_comparator<T>>;
+
+} // namespace dsn
diff --git a/src/perf_counter/perf_counter_atomic.h b/src/perf_counter/perf_counter_atomic.h
index b632c06ce1..d7a820fc44 100644
--- a/src/perf_counter/perf_counter_atomic.h
+++ b/src/perf_counter/perf_counter_atomic.h
@@ -210,7 +210,8 @@ class perf_counter_number_percentile_atomic : public perf_counter
                                           const char *section,
                                           const char *name,
                                           dsn_perf_counter_type_t type,
-                                          const char *dsptr)
+                                          const char *dsptr,
+                                          bool use_timer = true)
         : perf_counter(app, section, name, type, dsptr), _tail(0)
     {
         _results[COUNTER_PERCENTILE_50] = 0;
@@ -219,6 +220,10 @@ class perf_counter_number_percentile_atomic : public perf_counter
         _results[COUNTER_PERCENTILE_99] = 0;
         _results[COUNTER_PERCENTILE_999] = 0;
 
+        if (!use_timer) {
+            return;
+        }
+
         _counter_computation_interval_seconds = (int)dsn_config_get_value_uint64(
             "components.pegasus_perf_counter_number_percentile_atomic",
             "counter_computation_interval_seconds",
@@ -227,12 +232,17 @@ class perf_counter_number_percentile_atomic : public perf_counter
             "pegasus_perf_counter_number_percentile_atomic counters");
         _timer.reset(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios));
         _timer->expires_from_now(
-            boost::posix_time::seconds(rand() % _counter_computation_interval_seconds + 1));
+            boost::posix_time::seconds(::rand() % _counter_computation_interval_seconds + 1));
         _timer->async_wait(std::bind(
             &perf_counter_number_percentile_atomic::on_timer, this, _timer, std::placeholders::_1));
     }
 
-    ~perf_counter_number_percentile_atomic(void) { _timer->cancel(); }
+    ~perf_counter_number_percentile_atomic(void)
+    {
+        if (_timer) {
+            _timer->cancel();
+        }
+    }
 
     virtual void increment() { dassert(false, "invalid execution flow"); }
     virtual void decrement() { dassert(false, "invalid execution flow"); }
@@ -290,6 +300,8 @@ class perf_counter_number_percentile_atomic : public perf_counter
     }
 
 private:
+    friend class perf_counter_nth_element_finder;
+
     struct compute_context
     {
         int64_t ask[COUNTER_PERCENTILE_COUNT];
diff --git a/src/utils/test/CMakeLists.txt b/src/utils/test/CMakeLists.txt
index d0d27d1222..0516fa7dcd 100644
--- a/src/utils/test/CMakeLists.txt
+++ b/src/utils/test/CMakeLists.txt
@@ -25,5 +25,6 @@ set(MY_BINPLACES "${CMAKE_CURRENT_SOURCE_DIR}/config-bad-section.ini"
         "${CMAKE_CURRENT_SOURCE_DIR}/run.sh"
         "${CMAKE_CURRENT_SOURCE_DIR}/clear.sh"
         )
+add_subdirectory(nth_element_bench)
 add_definitions(-Wno-dangling-else)
 dsn_add_test()
diff --git a/src/utils/test/nth_element_bench/CMakeLists.txt b/src/utils/test/nth_element_bench/CMakeLists.txt
new file mode 100644
index 0000000000..217d9c4363
--- /dev/null
+++ b/src/utils/test/nth_element_bench/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(MY_PROJ_NAME nth_element_bench)
+project(${MY_PROJ_NAME} C CXX)
+
+# Source files under CURRENT project directory will be automatically included.
+# You can manually set MY_PROJ_SRC to include source files under other directories.
+set(MY_PROJ_SRC "")
+
+# Search mode for source files under CURRENT project directory?
+# "GLOB_RECURSE" for recursive search
+# "GLOB" for non-recursive search
+set(MY_SRC_SEARCH_MODE "GLOB")
+
+set(MY_PROJ_LIBS dsn_runtime dsn_utils)
+
+set(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)
+
+# Extra files that will be installed
+set(MY_BINPLACES "")
+
+dsn_add_executable()
+
+dsn_install_executable()
diff --git a/src/utils/test/nth_element_bench/nth_element_bench.cpp b/src/utils/test/nth_element_bench/nth_element_bench.cpp
new file mode 100644
index 0000000000..c1c5a2382d
--- /dev/null
+++ b/src/utils/test/nth_element_bench/nth_element_bench.cpp
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_layer1.h>
+#include <dsn/utility/nth_element.h>
+#include <dsn/utility/smart_pointers.h>
+#include <dsn/utility/string_conv.h>
+#include <dsn/utility/strings.h>
+
+#include "utils/test/nth_element_utils.h"
+
+void print_usage(const char *cmd)
+{
+    fmt::print("USAGE: {} <num_operations> <array_size> [nths]\n", cmd);
+    fmt::print("Run a simple benchmark that executes all sorts of nth_element_finder.\n\n");
+
+    fmt::print("    <num_operations>       the number of operations.\n");
+    fmt::print("    <array_size>           the size of array for each operation.\n");
+    fmt::print("    <range_size>           the size of range for each operation to \n"
+               "                           generate the integers randomly.\n");
+    fmt::print("    [nths]                 the nth list for each operation, separated by \n"
+               "                           comma(,) if more than one element, e.g., \n"
+               "                           \"2,5\" means finding 2nd and 5th elements;\n"
+               "                           if this arg is missing, nth list of \n"
+               "                           perf_counter_number_percentile_atomic will be \n"
+               "                           used, that is, P50, P90, P95, P99 and P999.\n");
+}
+
+template <typename NthElementFinder>
+int64_t run_nth_element(const std::vector<int64_t> &expected_elements,
+                        NthElementFinder &finder,
+                        std::function<void()> exec)
+{
+    auto start = dsn_now_ns();
+    exec();
+    auto end = dsn_now_ns();
+
+    if (finder.elements() != expected_elements) {
+        fmt::print(
+            "actual_elements != expected_elements\nactual_elements = {}\nexpected_elements: {}\n",
+            fmt::join(finder.elements(), " "),
+            fmt::join(expected_elements, " "));
+        ::exit(-1);
+    }
+
+    return static_cast<int64_t>(end - start);
+}
+
+int64_t run_stl_nth_element(const std::vector<int64_t> &array,
+                            const std::vector<int64_t> &expected_elements,
+                            dsn::stl_nth_element_finder<int64_t> &finder)
+{
+    auto start = dsn_now_ns();
+    std::vector<int64_t> container(array.size());
+    std::copy(array.begin(), array.end(), container.begin());
+    auto end = dsn_now_ns();
+
+    return static_cast<int64_t>(end - start) +
+           run_nth_element(expected_elements, finder, [&finder, &container]() {
+               finder(container.begin(), container.begin(), container.end());
+           });
+}
+
+void run_bench(size_t num_operations,
+               size_t array_size,
+               uint64_t range_size,
+               const std::vector<size_t> &nths)
+{
+    auto get_perf_counter_nths = [](size_t num) -> std::vector<size_t> {
+        return {static_cast<size_t>(num * 0.5),
+                static_cast<size_t>(num * 0.9),
+                static_cast<size_t>(num * 0.95),
+                static_cast<size_t>(num * 0.99),
+                static_cast<size_t>(num * 0.999)};
+    };
+
+    dsn::perf_counter_nth_element_finder perf_counter_finder;
+    dsn::stl_nth_element_finder<int64_t> stl_finder;
+
+    std::map<std::string, int64_t> exec_time_map = {{"perf_counter_nth_element", 0},
+                                                    {"stl_nth_element", 0}};
+    for (size_t i = 0; i < num_operations; ++i) {
+        std::vector<size_t> real_nths;
+        if (nths.empty()) {
+            real_nths = get_perf_counter_nths(array_size);
+        } else {
+            real_nths = nths;
+        }
+
+        dsn::integral_nth_element_case_generator<int64_t> generator(
+            array_size, 0, range_size, real_nths);
+
+        std::vector<int64_t> array;
+        std::vector<int64_t> expected_elements;
+        generator(array, expected_elements);
+
+        // Once `nths` is empty, the comparison between stl_nth_element_finder and
+        // perf_counter_nth_element_finder will be launched.
+        if (nths.empty()) {
+            perf_counter_finder.load_data(array);
+            exec_time_map["perf_counter_nth_element"] +=
+                run_nth_element(expected_elements, perf_counter_finder, [&perf_counter_finder]() {
+                    perf_counter_finder();
+                });
+        }
+
+        stl_finder.set_nths(real_nths);
+        exec_time_map["stl_nth_element"] +=
+            run_stl_nth_element(array, expected_elements, stl_finder);
+    }
+
+    for (const auto &t : exec_time_map) {
+        if (t.second == 0) {
+            continue;
+        }
+
+        std::chrono::nanoseconds nano(t.second);
+        auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();
+        fmt::print("Running {} operations of {} with each array {} elements took {} seconds.\n",
+                   num_operations,
+                   t.first,
+                   array_size,
+                   duration_s);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 4) {
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t num_operations;
+    if (!dsn::buf2uint64(argv[1], num_operations)) {
+        fmt::print(stderr, "Invalid num_operations: {}\n\n", argv[1]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (num_operations <= 0) {
+        fmt::print(stderr, "num_operations should be > 0: {}\n\n", num_operations);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t array_size;
+    if (!dsn::buf2uint64(argv[2], array_size)) {
+        fmt::print(stderr, "Invalid array_size: {}\n\n", argv[2]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (array_size <= 0 || array_size > MAX_QUEUE_LENGTH) {
+        fmt::print(
+            stderr, "array_size({}) should be > 0 and <= {}\n\n", array_size, MAX_QUEUE_LENGTH);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    uint64_t range_size;
+    if (!dsn::buf2uint64(argv[3], range_size)) {
+        fmt::print(stderr, "Invalid range_size: {}\n\n", argv[3]);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+    if (range_size <= 0) {
+        fmt::print(stderr, "range_size({}) should be > 0\n\n", range_size);
+
+        print_usage(argv[0]);
+        ::exit(-1);
+    }
+
+    std::vector<size_t> nths;
+    if (argc >= 5) {
+        std::vector<std::string> nth_strs;
+        dsn::utils::split_args(argv[4], nth_strs, ',');
+        for (const auto &s : nth_strs) {
+            size_t nth;
+            if (!dsn::buf2uint64(s, nth)) {
+                fmt::print(stderr, "Invalid nth number: {}\n\n", s);
+
+                print_usage(argv[0]);
+                ::exit(-1);
+            }
+
+            if (nth >= array_size) {
+                fmt::print(stderr, "nth({}) should be < array_size({})\n\n", array_size);
+
+                print_usage(argv[0]);
+                ::exit(-1);
+            }
+
+            nths.push_back(nth);
+        }
+    }
+
+    run_bench(num_operations, array_size, range_size, nths);
+
+    return 0;
+}
diff --git a/src/utils/test/nth_element_test.cpp b/src/utils/test/nth_element_test.cpp
new file mode 100644
index 0000000000..44586ecf26
--- /dev/null
+++ b/src/utils/test/nth_element_test.cpp
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/nth_element.h>
+
+#include <fmt/format.h>
+#include <gtest/gtest.h>
+
+#include "nth_element_utils.h"
+
+namespace dsn {
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_integral<typename NthElementFinder::value_type>::value>::type>
+void run_integral_cases(const typename NthElementFinder::container_type &array,
+                        const typename NthElementFinder::nth_container_type &nths,
+                        const typename NthElementFinder::container_type &expected_elements)
+{
+    auto container = array;
+
+    NthElementFinder finder;
+    finder.set_nths(nths);
+    finder(container.begin(), container.begin(), container.end());
+
+    ASSERT_EQ(finder.elements(), expected_elements);
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_integral<typename NthElementFinder::value_type>::value>::type>
+void run_basic_int64_cases()
+{
+    // Test cases:
+    // - both the array and the nth list are empty
+    // - the array has only one element, and the nth list is empty
+    // - the array has only one element, and the nth list has only one element
+    // - the array has only one element, and the nth list has duplicate elements
+    // - the array has only 2 identical elements, and the nth list has only one element
+    // - the array has only 2 identical elements, and the nth list has both elements
+    // - the array has only 2 identical elements, and the nth list has duplicat elements
+    // - the array has only 2 ordered elements, and the nth list has only one element
+    // - the array has only 2 ordered elements, and the nth list has both elements
+    // - the array has only 2 ordered elements, and the nth list has duplicat elements
+    // - the array has only 2 unordered elements, and the nth list has only one element
+    // - the array has only 2 unordered elements, and the nth list has both elements
+    // - the array has only 2 unordered elements, and the nth list has duplicat elements
+    // - the array contains identical elements, and the nth list has only one element
+    // - the array contains identical elements, and the nth list has all elements
+    // - the array contains identical elements, and the nth list has duplicat elements
+    // - all elements in the array are identical, and the nth list has 2 elements
+    // - all elements in the array are identical, and the nth list has all elements
+    // - all elements in the array are identical, and the nth list has duplicat elements
+    // - each element in the array is different from others, and the nth list has 3 elements
+    // - each element in the array is different from others, and the nth list has all elements
+    // - each element in the array is different from others, and the nth list has duplicat elements
+    struct test_case
+    {
+        typename NthElementFinder::container_type array;
+        typename NthElementFinder::nth_container_type nths;
+        typename NthElementFinder::container_type expected_elements;
+    } tests[] = {{{}, {}, {}},
+                 {{1}, {}, {}},
+                 {{1}, {0}, {1}},
+                 {{1}, {0, 0}, {1, 1}},
+                 {{1, 1}, {1}, {1}},
+                 {{1, 1}, {0, 1}, {1, 1}},
+                 {{1, 1}, {1, 1}, {1, 1}},
+                 {{1, 2}, {1}, {2}},
+                 {{1, 2}, {0, 1}, {1, 2}},
+                 {{1, 2}, {1, 1}, {2, 2}},
+                 {{2, 1}, {1}, {2}},
+                 {{2, 1}, {0, 1}, {1, 2}},
+                 {{2, 1}, {0, 0}, {1, 1}},
+                 {{2, 1, 2, 3, 2}, {2}, {2}},
+                 {{2, 1, 2, 3, 2}, {0, 1, 2, 3, 4}, {1, 2, 2, 2, 3}},
+                 {{2, 1, 2, 3, 2}, {0, 0, 2, 2, 3, 3}, {1, 1, 2, 2, 2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {2, 3}, {2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {0, 1, 2, 3, 4, 5}, {2, 2, 2, 2, 2, 2}},
+                 {{2, 2, 2, 2, 2, 2}, {1, 1, 2, 2, 5, 5}, {2, 2, 2, 2, 2, 2}},
+                 {{5, 6, 2, 8, 1, 7}, {3, 4, 5}, {6, 7, 8}},
+                 {{5, 6, 2, 8, 1, 7}, {0, 1, 2, 3, 4, 5}, {1, 2, 5, 6, 7, 8}},
+                 {{5, 6, 2, 8, 1, 7}, {0, 0, 2, 2, 5, 5}, {1, 1, 5, 5, 8, 8}}};
+
+    for (const auto &test : tests) {
+        run_integral_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);
+    }
+}
+
+TEST(nth_element_test, basic_int64) { run_basic_int64_cases<stl_nth_element_finder<int64_t>>(); }
+
+template <typename NthElementFinder>
+void run_generated_int64_cases()
+{
+    // Test cases:
+    // - generate empty array with empty nth list
+    // - generate an array of only one element with the nth list of only one element
+    // - generate an array of 2 elements with the nth list of 2 elements
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000
+    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000
+    struct test_case
+    {
+        typename NthElementFinder::size_type array_size;
+        int64_t initial_value;
+        uint64_t range_size;
+        typename NthElementFinder::nth_container_type nths;
+    } tests[] = {{0, 0, 2, {}},
+                 {1, 0, 2, {0}},
+                 {2, 0, 2, {0, 1}},
+                 {5000, 0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};
+
+    for (const auto &test : tests) {
+        integral_nth_element_case_generator<int64_t> generator(
+            test.array_size, test.initial_value, test.range_size, test.nths);
+
+        integral_nth_element_case_generator<int64_t>::container_type array;
+        integral_nth_element_case_generator<int64_t>::container_type expected_elements;
+        generator(array, expected_elements);
+
+        run_integral_cases<NthElementFinder>(array, test.nths, expected_elements);
+    }
+}
+
+TEST(nth_element_test, generated_int64)
+{
+    run_generated_int64_cases<stl_nth_element_finder<int64_t>>();
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>
+void run_floating_cases(const typename NthElementFinder::container_type &array,
+                        const typename NthElementFinder::nth_container_type &nths,
+                        const typename NthElementFinder::container_type &expected_elements)
+{
+    auto container = array;
+
+    NthElementFinder finder;
+    finder.set_nths(nths);
+    finder(container.begin(), container.begin(), container.end());
+
+    ASSERT_EQ(finder.elements().size(), expected_elements.size());
+    for (typename NthElementFinder::size_type i = 0; i < finder.elements().size(); ++i) {
+        ASSERT_DOUBLE_EQ(finder.elements()[i], expected_elements[i]);
+    }
+}
+
+template <typename NthElementFinder,
+          typename = typename std::enable_if<
+              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>
+void run_basic_double_cases()
+{
+    // Test cases:
+    // - both the array and the nth list are empty
+    // - the array has only one element, and the nth list is empty
+    // - the array has only one element, and the nth list has only one element
+    // - the array has only one element, and the nth list has duplicate elements
+    // - the array has only 2 identical elements, and the nth list has only one element
+    // - the array has only 2 identical elements, and the nth list has both elements
+    // - the array has only 2 identical elements, and the nth list has duplicat elements
+    // - the array has only 2 ordered elements, and the nth list has only one element
+    // - the array has only 2 ordered elements, and the nth list has both elements
+    // - the array has only 2 ordered elements, and the nth list has duplicat elements
+    // - the array has only 2 unordered elements, and the nth list has only one element
+    // - the array has only 2 unordered elements, and the nth list has both elements
+    // - the array has only 2 unordered elements, and the nth list has duplicat elements
+    // - the array contains identical elements, and the nth list has only one element
+    // - the array contains identical elements, and the nth list has all elements
+    // - the array contains identical elements, and the nth list has duplicat elements
+    // - all elements in the array are identical, and the nth list has 2 elements
+    // - all elements in the array are identical, and the nth list has all elements
+    // - all elements in the array are identical, and the nth list has duplicat elements
+    // - each element in the array is different from others, and the nth list has 3 elements
+    // - each element in the array is different from others, and the nth list has all elements
+    struct test_case
+    {
+        typename NthElementFinder::container_type array;
+        typename NthElementFinder::nth_container_type nths;
+        typename NthElementFinder::container_type expected_elements;
+    } tests[] = {
+        {{}, {}, {}},
+        {{1.23}, {}, {}},
+        {{1.23}, {0}, {1.23}},
+        {{1.23}, {0, 0}, {1.23, 1.23}},
+        {{1.23, 1.23}, {1}, {1.23}},
+        {{1.23, 1.23}, {0, 1}, {1.23, 1.23}},
+        {{1.23, 1.23}, {1, 1}, {1.23, 1.23}},
+        {{1.23, 2.34}, {1}, {2.34}},
+        {{1.23, 2.34}, {0, 1}, {1.23, 2.34}},
+        {{1.23, 2.34}, {1, 1}, {2.34, 2.34}},
+        {{2.34, 1.23}, {1}, {2.34}},
+        {{2.34, 1.23}, {0, 1}, {1.23, 2.34}},
+        {{2.34, 1.23}, {0, 0}, {1.23, 1.23}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {2}, {2.34}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 1, 2, 3, 4}, {1.23, 2.34, 2.34, 2.34, 3.56}},
+        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 0, 2, 2, 3, 3}, {1.23, 1.23, 2.34, 2.34, 2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34}, {2, 3}, {2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},
+         {0, 1, 2, 3, 4, 5},
+         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},
+        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},
+         {1, 1, 2, 2, 5, 5},
+         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89}, {3, 4, 5}, {6.78, 7.89, 8.90}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},
+         {0, 1, 2, 3, 4, 5},
+         {1.23, 2.34, 5.67, 6.78, 7.89, 8.90}},
+        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},
+         {0, 0, 2, 2, 5, 5},
+         {1.23, 1.23, 5.67, 5.67, 8.90, 8.90}}};
+
+    for (const auto &test : tests) {
+        run_floating_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);
+    }
+}
+
+TEST(nth_element_test, basic_double) { run_basic_double_cases<stl_nth_element_finder<double>>(); }
+
+template <typename NthElementFinder>
+void run_generated_double_cases()
+{
+    // Test cases:
+    // - generate empty array with empty nth list
+    // - generate an array of only one element with the nth list of only one element
+    // - generate an array of 2 elements with the nth list of 2 elements
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100
+    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000
+    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000
+    struct test_case
+    {
+        typename NthElementFinder::size_type array_size;
+        double initial_value;
+        uint64_t range_size;
+        typename NthElementFinder::nth_container_type nths;
+    } tests[] = {{0, 0.0, 2, {}},
+                 {1, 0.0, 2, {0}},
+                 {2, 0.0, 2, {0, 1}},
+                 {5000, 0.0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},
+                 {5000, 0.0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};
+
+    for (const auto &test : tests) {
+        floating_nth_element_case_generator<double> generator(
+            test.array_size, test.initial_value, test.range_size, test.nths);
+
+        floating_nth_element_case_generator<double>::container_type array;
+        floating_nth_element_case_generator<double>::container_type expected_elements;
+        generator(array, expected_elements);
+
+        run_floating_cases<NthElementFinder>(array, test.nths, expected_elements);
+    }
+}
+
+TEST(nth_element_test, generated_double)
+{
+    run_generated_double_cases<stl_nth_element_finder<double>>();
+}
+
+} // namespace dsn
diff --git a/src/utils/test/nth_element_utils.h b/src/utils/test/nth_element_utils.h
new file mode 100644
index 0000000000..7b138b2cfb
--- /dev/null
+++ b/src/utils/test/nth_element_utils.h
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+#include <dsn/utility/process_utils.h>
+#include <dsn/utility/rand.h>
+
+#include "perf_counter/perf_counter_atomic.h"
+
+namespace dsn {
+
+// The generator is used to produce the test cases randomly for unit tests and benchmarks
+// of nth elements.
+template <typename T,
+          typename Rand,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class nth_element_case_generator
+{
+public:
+    using value_type = T;
+    using container_type = typename std::vector<value_type>;
+    using size_type = typename container_type::size_type;
+    using nth_container_type = typename std::vector<size_type>;
+
+    nth_element_case_generator(size_type array_size,
+                               value_type initial_value,
+                               uint64_t range_size,
+                               const nth_container_type &nths)
+        : _array_size(array_size),
+          _initial_value(initial_value),
+          _range_size(range_size),
+          _nths(nths),
+          _rand(Rand())
+    {
+        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),
+                  "nth indexes({}) is not sorted",
+                  fmt::join(_nths, " "));
+
+        for (const auto &nth : _nths) {
+            dassert_f(
+                nth >= 0 && nth < _array_size, "nth should be in the range [0, {})", _array_size);
+        }
+    }
+
+    ~nth_element_case_generator() = default;
+
+    // Generate an out-of-order `array` sized `_array_size`, and put nth elements of sorted
+    // `array` to `elements` in the order of `_nths` which must be sorted.
+    //
+    // The process has 2 stages:
+    // (1) Generate a sorted `array` from _initial_value. Always generate next element by current
+    // element plus _rand(_range_size). Once the index of an element belongs to nth indexes, it
+    // will be appended to `elements`.
+    // (2) After the sorted `array` is generated, it will be shuffled to be out-of-order.
+    void operator()(container_type &array, container_type &elements)
+    {
+        array.clear();
+        elements.clear();
+
+        auto value = _initial_value;
+        for (size_type i = 0, j = 0; i < _array_size; ++i) {
+            array.push_back(value);
+            for (; j < _nths.size() && _nths[j] == i; ++j) {
+                elements.push_back(value);
+            }
+
+            auto delta = _rand(_range_size);
+            value += delta;
+        }
+        std::random_shuffle(array.begin(), array.end());
+    }
+
+private:
+    const size_type _array_size;
+    const value_type _initial_value;
+    const uint64_t _range_size;
+    const nth_container_type _nths;
+    const Rand _rand;
+
+    DISALLOW_COPY_AND_ASSIGN(nth_element_case_generator);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+class integral_rand_generator
+{
+public:
+    T operator()(const uint64_t &upper) const { return static_cast<T>(rand::next_u64(upper)); }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+using integral_nth_element_case_generator =
+    nth_element_case_generator<T, integral_rand_generator<T>>;
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+class floating_rand_generator
+{
+public:
+    T operator()(const uint64_t &upper) const
+    {
+        return static_cast<T>(rand::next_u64(upper)) +
+               static_cast<T>(rand::next_u64(upper)) / static_cast<T>(upper);
+    }
+};
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_nth_element_case_generator =
+    nth_element_case_generator<T, floating_rand_generator<T>>;
+
+// Finder class based on perf_counter in comparison with other finders for multiple nth elements.
+class perf_counter_nth_element_finder
+{
+public:
+    using container_type = typename std::vector<int64_t>;
+    using size_type = typename container_type::size_type;
+
+    perf_counter_nth_element_finder()
+        : _perf_counter("benchmark",
+                        "perf_counter_number_percentile_atomic",
+                        "nth_element",
+                        COUNTER_TYPE_NUMBER_PERCENTILES,
+                        "nth_element implementation by perf_counter_number_percentile_atomic",
+                        false),
+          _elements(COUNTER_PERCENTILE_COUNT, int64_t())
+    {
+    }
+
+    void load_data(const container_type &array)
+    {
+        _perf_counter._tail.store(0, std::memory_order_relaxed);
+        for (const auto &e : array) {
+            _perf_counter.set(e);
+        }
+    }
+
+    void operator()()
+    {
+        _perf_counter.calc(
+            boost::make_shared<dsn::perf_counter_number_percentile_atomic::compute_context>());
+        std::copy(_perf_counter._results,
+                  _perf_counter._results + COUNTER_PERCENTILE_COUNT,
+                  _elements.begin());
+    }
+
+    const container_type &elements() const { return _elements; }
+
+private:
+    dsn::perf_counter_number_percentile_atomic _perf_counter;
+    container_type _elements;
+
+    DISALLOW_COPY_AND_ASSIGN(perf_counter_nth_element_finder);
+};
+
+} // namespace dsn

From 030bfa0b4e383e0f07626e4b6e9def8a532786c5 Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Fri, 17 Jun 2022 20:37:59 +0800
Subject: [PATCH 20/21] feat(new_metrics): implement the percentile (#1112)

---
 include/dsn/utility/alloc.h       |  84 +++++++++
 include/dsn/utility/metrics.h     | 300 +++++++++++++++++++++++++++++-
 include/dsn/utility/ports.h       |   3 +
 src/utils/alloc.cpp               |  54 ++++++
 src/utils/latency_tracer.cpp      |   1 +
 src/utils/metrics.cpp             |  50 ++++-
 src/utils/shared_io_service.cpp   |  60 ++++++
 src/utils/shared_io_service.h     |  33 +---
 src/utils/test/metrics_test.cpp   | 262 +++++++++++++++++++++++++-
 src/utils/test/percentile_utils.h |  88 +++++++++
 10 files changed, 903 insertions(+), 32 deletions(-)
 create mode 100644 include/dsn/utility/alloc.h
 create mode 100644 src/utils/alloc.cpp
 create mode 100644 src/utils/shared_io_service.cpp
 create mode 100644 src/utils/test/percentile_utils.h

diff --git a/include/dsn/utility/alloc.h b/include/dsn/utility/alloc.h
new file mode 100644
index 0000000000..6a5d63e7ce
--- /dev/null
+++ b/include/dsn/utility/alloc.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <new>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/ports.h>
+
+namespace dsn {
+
+#ifdef CACHELINE_SIZE
+
+extern void *cacheline_aligned_alloc(size_t size);
+
+extern void cacheline_aligned_free(void *mem_block);
+
+template <typename T>
+using cacheline_aligned_ptr = typename std::unique_ptr<T, std::function<void(void *)>>;
+
+template <typename T>
+cacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len)
+{
+    void *buffer = cacheline_aligned_alloc(sizeof(T) * len);
+    if (dsn_unlikely(buffer == nullptr)) {
+        return cacheline_aligned_ptr<T>(nullptr, [](void *) {});
+    }
+
+    T *array = new (buffer) T[len];
+
+#ifndef NDEBUG
+    if (sizeof(T) <= CACHELINE_SIZE && (sizeof(T) & (sizeof(T) - 1)) == 0) {
+        for (size_t i = 0; i < len; ++i) {
+            T *elem = &(array[i]);
+            dassert_f((reinterpret_cast<const uintptr_t>(elem) & (sizeof(T) - 1)) == 0,
+                      "unaligned array element for cache line: array={}, length={}, index={}, "
+                      "elem={}, elem_size={}, mask={}, cacheline_size={}",
+                      fmt::ptr(array),
+                      len,
+                      i,
+                      fmt::ptr(elem),
+                      sizeof(T),
+                      sizeof(T) - 1,
+                      CACHELINE_SIZE);
+        }
+    }
+#endif
+
+    return cacheline_aligned_ptr<T>(array, cacheline_aligned_free);
+}
+
+template <typename T>
+cacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len, const T &val)
+{
+    auto array = cacheline_aligned_alloc_array<T>(len);
+    if (array) {
+        std::fill(array.get(), array.get() + len, val);
+    }
+
+    return array;
+}
+
+#endif
+
+} // namespace dsn
diff --git a/include/dsn/utility/metrics.h b/include/dsn/utility/metrics.h
index 22da07b33c..0951233931 100644
--- a/include/dsn/utility/metrics.h
+++ b/include/dsn/utility/metrics.h
@@ -17,19 +17,29 @@
 
 #pragma once
 
+#include <algorithm>
 #include <atomic>
+#include <bitset>
+#include <functional>
+#include <memory>
 #include <mutex>
+#include <set>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
+#include <vector>
+
+#include <boost/asio/deadline_timer.hpp>
 
 #include <dsn/c/api_utilities.h>
 #include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/alloc.h>
 #include <dsn/utility/autoref_ptr.h>
 #include <dsn/utility/casts.h>
 #include <dsn/utility/enum_helper.h>
 #include <dsn/utility/long_adder.h>
+#include <dsn/utility/nth_element.h>
 #include <dsn/utility/ports.h>
 #include <dsn/utility/singleton.h>
 #include <dsn/utility/string_view.h>
@@ -90,6 +100,14 @@
     dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name(                        \
         {#entity_type, #name, unit, desc, ##__VA_ARGS__})
 
+// The percentile supports both integral and floating types.
+#define METRIC_DEFINE_percentile_int64(entity_type, name, unit, desc, ...)                         \
+    dsn::percentile_prototype<int64_t> METRIC_##name(                                              \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+#define METRIC_DEFINE_percentile_double(entity_type, name, unit, desc, ...)                        \
+    dsn::floating_percentile_prototype<double> METRIC_##name(                                      \
+        {#entity_type, #name, unit, desc, ##__VA_ARGS__})
+
 // The following macros act as forward declarations for entity types and metric prototypes.
 #define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name
 #define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name
@@ -102,6 +120,10 @@
     extern dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name
 #define METRIC_DECLARE_concurrent_volatile_counter(name)                                           \
     extern dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name
+#define METRIC_DECLARE_percentile_int64(name)                                                      \
+    extern dsn::percentile_prototype<int64_t> METRIC_##name
+#define METRIC_DECLARE_percentile_double(name)                                                     \
+    extern dsn::floating_percentile_prototype<double> METRIC_##name
 
 namespace dsn {
 
@@ -393,13 +415,23 @@ template <typename Adder = striped_long_adder, bool IsVolatile = false>
 class counter : public metric
 {
 public:
-    template <bool Volatile = IsVolatile, typename = typename std::enable_if<!Volatile>::type>
+    // To decide which member function should be called by template parameter, the parameter
+    // should be one of the class template parameters in case that the parameter is needed to
+    // be written each time the member function is called.
+    //
+    // Using class template parameter to decide which member function should be called, another
+    // function template parameter with the same meaning should be introduced, since the class
+    // template parameter cannot be used as a function template parameter again and will lead
+    // to compilation error.
+    template <bool Volatile = IsVolatile,
+              typename = typename std::enable_if<!Volatile && !IsVolatile>::type>
     int64_t value() const
     {
         return _adder.value();
     }
 
-    template <bool Volatile = IsVolatile, typename = typename std::enable_if<Volatile>::type>
+    template <bool Volatile = IsVolatile,
+              typename = typename std::enable_if<Volatile && IsVolatile>::type>
     int64_t value()
     {
         return _adder.fetch_and_reset();
@@ -447,4 +479,268 @@ using concurrent_volatile_counter_ptr = counter_ptr<concurrent_long_adder, true>
 template <typename Adder = striped_long_adder>
 using volatile_counter_prototype = metric_prototype_with<counter<Adder, true>>;
 
+// All supported kinds of kth percentiles. User can configure required kth percentiles for
+// each percentile. Only configured kth percentiles will be computed. This can reduce CPU
+// consumption.
+enum class kth_percentile_type : size_t
+{
+    P50,
+    P90,
+    P95,
+    P99,
+    P999,
+    COUNT,
+    INVALID
+};
+
+// Support to load from configuration files for percentiles.
+ENUM_BEGIN(kth_percentile_type, kth_percentile_type::INVALID)
+ENUM_REG(kth_percentile_type::P50)
+ENUM_REG(kth_percentile_type::P90)
+ENUM_REG(kth_percentile_type::P95)
+ENUM_REG(kth_percentile_type::P99)
+ENUM_REG(kth_percentile_type::P999)
+ENUM_END(kth_percentile_type)
+
+const std::vector<double> kKthDecimals = {0.5, 0.9, 0.95, 0.99, 0.999};
+
+inline size_t kth_percentile_to_nth_index(size_t size, size_t kth_index)
+{
+    auto decimal = kKthDecimals[kth_index];
+    // Since the kth percentile is the value that is greater than k percent of the data values after
+    // ranking them (https://people.richland.edu/james/ictcm/2001/descriptive/helpposition.html),
+    // compute the nth index by size * decimal rather than size * decimal - 1.
+    return static_cast<size_t>(size * decimal);
+}
+
+inline size_t kth_percentile_to_nth_index(size_t size, kth_percentile_type type)
+{
+    return kth_percentile_to_nth_index(size, static_cast<size_t>(type));
+}
+
+std::set<kth_percentile_type> get_all_kth_percentile_types()
+{
+    std::set<kth_percentile_type> all_types;
+    for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+        all_types.insert(static_cast<kth_percentile_type>(i));
+    }
+    return all_types;
+}
+const std::set<kth_percentile_type> kAllKthPercentileTypes = get_all_kth_percentile_types();
+
+// `percentile_timer` is a timer class that encapsulates the details how each percentile is
+// computed periodically.
+//
+// To be instantiated, it requires `interval_ms` at which a percentile is computed and `exec`
+// which is used to compute percentile.
+//
+// In case that all percentiles are computed at the same time and lead to very high load,
+// first computation for percentile will be delayed at a random interval.
+class percentile_timer
+{
+public:
+    using exec_fn = std::function<void()>;
+
+    percentile_timer(uint64_t interval_ms, exec_fn exec);
+    ~percentile_timer() = default;
+
+    // Get the initial delay that is randomly generated by `generate_initial_delay_ms()`.
+    uint64_t get_initial_delay_ms() const { return _initial_delay_ms; }
+
+private:
+    // Generate an initial delay randomly in case that all percentiles are computed at the
+    // same time.
+    static uint64_t generate_initial_delay_ms(uint64_t interval_ms);
+
+    void on_timer(const boost::system::error_code &ec);
+
+    const uint64_t _initial_delay_ms;
+    const uint64_t _interval_ms;
+    const exec_fn _exec;
+    std::unique_ptr<boost::asio::deadline_timer> _timer;
+};
+
+// The percentile is a metric type that samples observations. The size of samples has an upper
+// bound. Once the maximum size is reached, the earliest observations will be overwritten.
+//
+// On the other hand, kth percentiles, such as P50, P90, P95, P99, P999, will be calculated
+// periodically over all samples. The kth percentiles which are calculated are configurable
+// provided that they are of valid kth_percentile_type (i.e. in kAllKthPercentileTypes).
+//
+// The most common usage of percentile is latency, such as server-level and replica-level
+// latencies. For example, if P99 latency is 10 ms, it means the latencies of 99% requests
+// are less than 10 ms.
+//
+// The percentile is implemented by the finder for nth elements. Each kth percentile is firstly
+// converted to nth index; then, find the element corresponding to the nth index.
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+class percentile : public metric
+{
+public:
+    using value_type = T;
+    using size_type = typename NthElementFinder::size_type;
+
+    void set(const value_type &val)
+    {
+        const auto index = _tail.fetch_add(1, std::memory_order_relaxed);
+        _samples.get()[index & (_sample_size - 1)] = val;
+    }
+
+    // If `type` is not configured, it will return false with zero value stored in `val`;
+    // otherwise, it will always return true with the value corresponding to `type`.
+    bool get(kth_percentile_type type, value_type &val) const
+    {
+        const auto index = static_cast<size_t>(type);
+        dcheck_lt(index, static_cast<size_t>(kth_percentile_type::COUNT));
+
+        val = _full_nth_elements[index].load(std::memory_order_relaxed);
+        return _kth_percentile_bitset.test(index);
+    }
+
+    bool timer_enabled() const { return !!_timer; }
+
+    uint64_t get_initial_delay_ms() const
+    {
+        return timer_enabled() ? _timer->get_initial_delay_ms() : 0;
+    }
+
+    static const size_type kDefaultSampleSize = 4096;
+
+protected:
+    // interval_ms is the interval between the computations for percentiles. Its unit is
+    // milliseconds. It's suggested that interval_ms should be near the period between pulls
+    // from or pushes to the monitoring system.
+    percentile(const metric_prototype *prototype,
+               uint64_t interval_ms = 10000,
+               const std::set<kth_percentile_type> &kth_percentiles = kAllKthPercentileTypes,
+               size_type sample_size = kDefaultSampleSize)
+        : metric(prototype),
+          _sample_size(sample_size),
+          _last_real_sample_size(0),
+          _samples(cacheline_aligned_alloc_array<value_type>(sample_size, value_type{})),
+          _tail(0),
+          _kth_percentile_bitset(),
+          _full_nth_elements(static_cast<size_t>(kth_percentile_type::COUNT)),
+          _nth_element_finder(),
+          _timer()
+    {
+        dassert(_sample_size > 0 && (_sample_size & (_sample_size - 1)) == 0,
+                "sample_sizes should be > 0 and power of 2");
+
+        dassert(_samples, "_samples should be valid pointer");
+
+        for (const auto &kth : kth_percentiles) {
+            _kth_percentile_bitset.set(static_cast<size_t>(kth));
+        }
+
+        for (size_type i = 0; i < _full_nth_elements.size(); ++i) {
+            _full_nth_elements[i].store(value_type{}, std::memory_order_relaxed);
+        }
+
+#ifdef DSN_MOCK_TEST
+        if (interval_ms == 0) {
+            // Timer is disabled.
+            return;
+        }
+#else
+        dcheck_gt(interval_ms, 0);
+#endif
+
+        _timer.reset(new percentile_timer(
+            interval_ms,
+            std::bind(&percentile<value_type, NthElementFinder>::find_nth_elements, this)));
+    }
+
+    virtual ~percentile() = default;
+
+private:
+    using nth_container_type = typename NthElementFinder::nth_container_type;
+
+    friend class metric_entity;
+    friend class ref_ptr<percentile<value_type, NthElementFinder>>;
+
+    void find_nth_elements()
+    {
+        size_type real_sample_size = std::min(static_cast<size_type>(_tail.load()), _sample_size);
+        if (real_sample_size == 0) {
+            // No need to find since there has not been any sample yet.
+            return;
+        }
+
+        // If the size of samples changes, the nth indexs should be updated.
+        if (real_sample_size != _last_real_sample_size) {
+            set_real_nths(real_sample_size);
+            _last_real_sample_size = real_sample_size;
+        }
+
+        // Find nth elements.
+        std::vector<T> array(real_sample_size);
+        std::copy(_samples.get(), _samples.get() + real_sample_size, array.begin());
+        _nth_element_finder(array.begin(), array.begin(), array.end());
+
+        // Store nth elements.
+        const auto &elements = _nth_element_finder.elements();
+        for (size_t i = 0, next = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+            if (!_kth_percentile_bitset.test(i)) {
+                continue;
+            }
+            _full_nth_elements[i].store(elements[next++], std::memory_order_relaxed);
+        }
+    }
+
+    void set_real_nths(size_type real_sample_size)
+    {
+        nth_container_type nths;
+        for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {
+            if (!_kth_percentile_bitset.test(i)) {
+                continue;
+            }
+
+            auto size = static_cast<size_t>(real_sample_size);
+            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, i));
+            nths.push_back(nth);
+        }
+
+        _nth_element_finder.set_nths(nths);
+    }
+
+    const size_type _sample_size;
+    size_type _last_real_sample_size;
+    cacheline_aligned_ptr<value_type> _samples;
+    std::atomic<uint64_t> _tail; // use unsigned int to avoid running out of bound
+    std::bitset<static_cast<size_t>(kth_percentile_type::COUNT)> _kth_percentile_bitset;
+    std::vector<std::atomic<value_type>> _full_nth_elements;
+    NthElementFinder _nth_element_finder;
+
+    std::unique_ptr<percentile_timer> _timer;
+};
+
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+using percentile_ptr = ref_ptr<percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+using percentile_prototype = metric_prototype_with<percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile = percentile<T, NthElementFinder>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_ptr = ref_ptr<floating_percentile<T, NthElementFinder>>;
+
+template <typename T,
+          typename NthElementFinder = floating_stl_nth_element_finder<T>,
+          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_prototype =
+    metric_prototype_with<floating_percentile<T, NthElementFinder>>;
+
 } // namespace dsn
diff --git a/include/dsn/utility/ports.h b/include/dsn/utility/ports.h
index 6e2b182755..568fbf231b 100644
--- a/include/dsn/utility/ports.h
+++ b/include/dsn/utility/ports.h
@@ -102,6 +102,9 @@
 
 // This is a NOP if CACHELINE_SIZE is not defined.
 #ifdef CACHELINE_SIZE
+static_assert((CACHELINE_SIZE & (CACHELINE_SIZE - 1)) == 0 &&
+                  (CACHELINE_SIZE & (sizeof(void *) - 1)) == 0,
+              "CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *)");
 #define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
 #else
 #define CACHELINE_ALIGNED
diff --git a/src/utils/alloc.cpp b/src/utils/alloc.cpp
new file mode 100644
index 0000000000..fbf641a7e2
--- /dev/null
+++ b/src/utils/alloc.cpp
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <dsn/utility/alloc.h>
+
+#include <cstdlib>
+
+#include <dsn/utility/safe_strerror_posix.h>
+
+namespace dsn {
+
+#ifdef CACHELINE_SIZE
+
+/* extern */ void *cacheline_aligned_alloc(size_t size)
+{
+    if (dsn_unlikely(size == 0)) {
+        return nullptr;
+    }
+
+    void *buffer = nullptr;
+    // CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *), which have been
+    // checked statically at compile time when CACHELINE_SIZE is defined as macro.
+    int err = posix_memalign(&buffer, CACHELINE_SIZE, size);
+
+    // Generally there are 2 possible errors for posix_memalign as below:
+    // [EINVAL]
+    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).
+    // [ENOMEM]
+    //     There is insufficient memory available with the requested alignment.
+    // Thus making an assertion here is enough.
+    dassert_f(err == 0, "error calling posix_memalign: {}", utils::safe_strerror(err).c_str());
+
+    return buffer;
+}
+
+/* extern */ void cacheline_aligned_free(void *mem_block) { free(mem_block); }
+
+#endif
+
+} // namespace dsn
diff --git a/src/utils/latency_tracer.cpp b/src/utils/latency_tracer.cpp
index 863846da3a..e32d257ae9 100644
--- a/src/utils/latency_tracer.cpp
+++ b/src/utils/latency_tracer.cpp
@@ -19,6 +19,7 @@
 #include <dsn/perf_counter/perf_counters.h>
 #include <dsn/service_api_c.h>
 #include <dsn/dist/fmt_logging.h>
+#include <dsn/utility/config_api.h>
 #include <dsn/utility/flags.h>
 
 #include <utility>
diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp
index 0ec1415cac..656e49363f 100644
--- a/src/utils/metrics.cpp
+++ b/src/utils/metrics.cpp
@@ -18,6 +18,9 @@
 #include <dsn/utility/metrics.h>
 
 #include <dsn/c/api_utilities.h>
+#include <dsn/utility/rand.h>
+
+#include "shared_io_service.h"
 
 namespace dsn {
 
@@ -64,7 +67,15 @@ metric_entity_prototype::metric_entity_prototype(const char *name) : _name(name)
 
 metric_entity_prototype::~metric_entity_prototype() {}
 
-metric_registry::metric_registry() {}
+metric_registry::metric_registry()
+{
+    // We should ensure that metric_registry is destructed before shared_io_service is destructed.
+    // Once shared_io_service is destructed before metric_registry is destructed,
+    // boost::asio::io_service needed by metrics in metric_registry such as percentile_timer will
+    // be released firstly, then will lead to heap-use-after-free error since percentiles in
+    // metric_registry are still running but the resources they needed have been released.
+    tools::shared_io_service::instance();
+}
 
 metric_registry::~metric_registry() {}
 
@@ -100,4 +111,41 @@ metric_prototype::~metric_prototype() {}
 
 metric::metric(const metric_prototype *prototype) : _prototype(prototype) {}
 
+uint64_t percentile_timer::generate_initial_delay_ms(uint64_t interval_ms)
+{
+    dcheck_gt(interval_ms, 0);
+
+    if (interval_ms < 1000) {
+        return rand::next_u64() % interval_ms + 50;
+    }
+
+    uint64_t interval_seconds = interval_ms / 1000;
+    return (rand::next_u64() % interval_seconds + 1) * 1000 + rand::next_u64() % 1000;
+}
+
+percentile_timer::percentile_timer(uint64_t interval_ms, exec_fn exec)
+    : _initial_delay_ms(generate_initial_delay_ms(interval_ms)),
+      _interval_ms(interval_ms),
+      _exec(exec),
+      _timer(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios))
+{
+    _timer->expires_from_now(boost::posix_time::milliseconds(_initial_delay_ms));
+    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));
+}
+
+void percentile_timer::on_timer(const boost::system::error_code &ec)
+{
+    if (dsn_unlikely(!!ec)) {
+        dassert_f(ec == boost::system::errc::operation_canceled,
+                  "failed to exec on_timer with an error that cannot be handled: {}",
+                  ec.message());
+        return;
+    }
+
+    _exec();
+
+    _timer->expires_from_now(boost::posix_time::milliseconds(_interval_ms));
+    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));
+}
+
 } // namespace dsn
diff --git a/src/utils/shared_io_service.cpp b/src/utils/shared_io_service.cpp
new file mode 100644
index 0000000000..d4a82d95b6
--- /dev/null
+++ b/src/utils/shared_io_service.cpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "shared_io_service.h"
+
+#include <dsn/utility/flags.h>
+
+namespace dsn {
+namespace tools {
+
+const uint32_t kMinTimerServiceWorkerCount = 3;
+DSN_DEFINE_uint32("core",
+                  timer_service_worker_count,
+                  kMinTimerServiceWorkerCount,
+                  "the number of threads for timer service");
+DSN_DEFINE_validator(timer_service_worker_count, [](uint32_t worker_count) -> bool {
+    if (worker_count < kMinTimerServiceWorkerCount) {
+        derror("timer_service_worker_count should be at least 3, where one thread is used to "
+               "collect all metrics from registery for monitoring systems, and another two threads "
+               "are used to compute percentiles.");
+        return false;
+    }
+    return true;
+});
+
+shared_io_service::shared_io_service()
+{
+    _workers.reserve(FLAGS_timer_service_worker_count);
+    for (uint32_t i = 0; i < FLAGS_timer_service_worker_count; ++i) {
+        _workers.emplace_back([this]() {
+            boost::asio::io_service::work work(ios);
+            ios.run();
+        });
+    }
+}
+
+shared_io_service::~shared_io_service()
+{
+    ios.stop();
+    for (auto &worker : _workers) {
+        worker.join();
+    }
+}
+
+} // namespace tools
+} // namespace dsn
diff --git a/src/utils/shared_io_service.h b/src/utils/shared_io_service.h
index a0e5082c80..df6db1326f 100644
--- a/src/utils/shared_io_service.h
+++ b/src/utils/shared_io_service.h
@@ -36,10 +36,10 @@
 #pragma once
 
 #include <thread>
-#include <memory>
 #include <vector>
+
 #include <boost/asio.hpp>
-#include <dsn/utility/config_api.h>
+
 #include <dsn/utility/singleton.h>
 
 namespace dsn {
@@ -54,32 +54,13 @@ class shared_io_service : public utils::singleton<shared_io_service>
     boost::asio::io_service ios;
 
 private:
-    shared_io_service()
-    {
-        _io_service_worker_count =
-            (int)dsn_config_get_value_uint64("core",
-                                             "timer_service_worker_count",
-                                             2,
-                                             "thread number for timer service for core itself");
-        for (int i = 0; i < _io_service_worker_count; i++) {
-            _workers.push_back(std::shared_ptr<std::thread>(new std::thread([this]() {
-                boost::asio::io_service::work work(ios);
-                ios.run();
-            })));
-        }
-    }
-    ~shared_io_service()
-    {
-        ios.stop();
-        for (auto worker : _workers) {
-            worker->join();
-        }
-    }
+    friend class utils::singleton<shared_io_service>;
 
-    int _io_service_worker_count;
-    std::vector<std::shared_ptr<std::thread>> _workers;
+    shared_io_service();
+    ~shared_io_service();
 
-    friend class utils::singleton<shared_io_service>;
+    std::vector<std::thread> _workers;
 };
+
 } // namespace tools
 } // namespace dsn
diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp
index 6ba5b65687..5af3117328 100644
--- a/src/utils/test/metrics_test.cpp
+++ b/src/utils/test/metrics_test.cpp
@@ -18,11 +18,14 @@
 #include <dsn/utility/metrics.h>
 #include <dsn/utility/rand.h>
 
+#include <chrono>
 #include <thread>
 #include <vector>
 
 #include <gtest/gtest.h>
 
+#include "percentile_utils.h"
+
 namespace dsn {
 
 class my_gauge : public metric
@@ -106,6 +109,16 @@ METRIC_DEFINE_concurrent_volatile_counter(my_server,
                                           dsn::metric_unit::kRequests,
                                           "a server-level concurrent_volatile_counter for test");
 
+METRIC_DEFINE_percentile_int64(my_server,
+                               test_percentile_int64,
+                               dsn::metric_unit::kNanoSeconds,
+                               "a server-level percentile of int64 type for test");
+
+METRIC_DEFINE_percentile_double(my_server,
+                                test_percentile_double,
+                                dsn::metric_unit::kNanoSeconds,
+                                "a server-level percentile of double type for test");
+
 namespace dsn {
 
 TEST(metrics_test, create_entity)
@@ -345,7 +358,7 @@ TEST(metrics_test, gauge_double)
 void execute(int64_t num_threads, std::function<void(int)> runner)
 {
     std::vector<std::thread> threads;
-    for (int64_t i = 0; i < num_threads; i++) {
+    for (int64_t i = 0; i < num_threads; ++i) {
         threads.emplace_back([i, &runner]() { runner(i); });
     }
     for (auto &t : threads) {
@@ -388,7 +401,7 @@ void run_increment_by(MetricPtr &my_metric,
         deltas.push_back(delta);
     }
 
-    execute(num_threads, [num_operations, &my_metric, &deltas](int tid) mutable {
+    execute(num_threads, [num_operations, &my_metric, &deltas](int64_t tid) mutable {
         for (int64_t i = 0; i < num_operations; ++i) {
             auto delta = deltas[tid * num_operations + i];
             increment_by(std::integral_constant<bool, IsIncrement>{}, my_metric, delta);
@@ -555,7 +568,7 @@ void run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_me
 
     execute(num_threads_write + num_threads_read,
             [num_operations, num_threads_write, &my_metric, &deltas, &results, &completed](
-                int tid) mutable {
+                int64_t tid) mutable {
                 if (tid < num_threads_write) {
                     for (int64_t i = 0; i < num_operations; ++i) {
                         my_metric->increment_by(deltas[tid * num_operations + i]);
@@ -646,4 +659,247 @@ TEST(metrics_test, volatile_counter)
     run_volatile_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_volatile_counter);
 }
 
+template <typename T, typename Prototype, typename Checker>
+void run_percentile(const metric_entity_ptr &my_entity,
+                    const Prototype &prototype,
+                    const std::vector<T> &data,
+                    size_t num_preload,
+                    uint64_t interval_ms,
+                    uint64_t exec_ms,
+                    const std::set<kth_percentile_type> &kth_percentiles,
+                    size_t sample_size,
+                    size_t num_threads,
+                    const std::vector<T> &expected_elements,
+                    Checker checker)
+{
+    dassert_f(num_threads > 0, "Invalid num_threads({})", num_threads);
+    dassert_f(data.size() <= sample_size && data.size() % num_threads == 0,
+              "Invalid arguments, data_size={}, sample_size={}, num_threads={}",
+              data.size(),
+              sample_size,
+              num_threads);
+
+    auto my_metric = prototype.instantiate(my_entity, interval_ms, kth_percentiles, sample_size);
+
+    // Preload zero in current thread.
+    for (size_t i = 0; i < num_preload; ++i) {
+        my_metric->set(0);
+    }
+
+    // Load other data in each spawned thread evenly.
+    const size_t num_operations = data.size() / num_threads;
+    execute(static_cast<int64_t>(num_threads),
+            [num_operations, &my_metric, &data](int64_t tid) mutable {
+                for (size_t i = 0; i < num_operations; ++i) {
+                    my_metric->set(data[static_cast<size_t>(tid) * num_operations + i]);
+                }
+            });
+
+    // Wait a while in order to finish computing all percentiles.
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(my_metric->get_initial_delay_ms() + interval_ms + exec_ms));
+
+    // Check if actual elements of kth percentiles are equal to the expected ones.
+    std::vector<T> actual_elements;
+    for (const auto &kth : kAllKthPercentileTypes) {
+        T value;
+        if (kth_percentiles.find(kth) == kth_percentiles.end()) {
+            ASSERT_FALSE(my_metric->get(kth, value));
+            checker(value, 0);
+        } else {
+            ASSERT_TRUE(my_metric->get(kth, value));
+            actual_elements.push_back(value);
+        }
+    }
+    checker(actual_elements, expected_elements);
+
+    // Check if this percentile is included in the entity.
+    auto metrics = my_entity->metrics();
+    ASSERT_EQ(metrics[&prototype].get(), static_cast<metric *>(my_metric.get()));
+
+    // Check if the prototype is referenced by this percentile.
+    ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(&prototype));
+}
+
+template <typename T, typename Prototype, typename CaseGenerator, typename Checker>
+void run_percentile_cases(const Prototype &prototype)
+{
+    using value_type = T;
+    const auto p50 = kth_percentile_type::P50;
+    const auto p90 = kth_percentile_type::P90;
+    const auto p99 = kth_percentile_type::P99;
+
+    // Test cases:
+    // - input none of sample with none of kth percentile
+    // - input 1 sample with none of kth percentile
+    // - input 1 sample with 1 kth percentile
+    // - input 1 sample with 2 kth percentiles
+    // - input 1 sample with all kth percentiles
+    // - input 1 sample with 1 kth percentile, capacity of 2
+    // - input 1 sample with 2 kth percentiles, capacity of 2
+    // - input 1 sample with all kth percentiles, capacity of 2
+    // - input 2 samples with 1 kth percentile
+    // - input 2 samples with 2 kth percentiles
+    // - input 2 samples with all kth percentiles
+    // - input 10 samples with 1 kth percentile, capacity of 16
+    // - input 10 samples with 2 kth percentiles, capacity of 16
+    // - input 10 samples with all kth percentiles, capacity of 16
+    // - input 10 samples with 1 kth percentile by 2 threads, capacity of 16
+    // - input 10 samples with 2 kth percentiles by 2 threads, capacity of 16
+    // - input 10 samples with all kth percentiles by 2 threads, capacity of 16
+    // - input 16 samples with 1 kth percentile
+    // - input 16 samples with 2 kth percentiles
+    // - input 16 samples with all kth percentiles
+    // - input 16 samples with 1 kth percentile by 2 threads
+    // - input 16 samples with 2 kth percentiles by 2 threads
+    // - input 16 samples with all kth percentiles by 2 threads
+    // - preload 5 samples and input 16 samples with 1 kth percentile by 2 threads
+    // - preload 5 samples and input 16 samples with 2 kth percentiles by 2 threads
+    // - preload 5 samples and input 16 samples with all kth percentiles by 2 threads
+    // - input 2000 samples with 1 kth percentile, capacity of 4096
+    // - input 2000 samples with 2 kth percentiles, capacity of 4096
+    // - input 2000 samples with all kth percentiles, capacity of 4096
+    // - input 2000 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - input 2000 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - input 2000 samples with all kth percentiles by 4 threads, capacity of 4096
+    // - input 4096 samples with 1 kth percentile, capacity of 4096
+    // - input 4096 samples with 2 kth percentiles, capacity of 4096
+    // - input 4096 samples with all kth percentiles, capacity of 4096
+    // - input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - input 4096 samples with all kth percentiles by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096
+    // - preload 5 input 4096 samples with all kth percentiles by 4 threads, capacity of 4096
+    struct test_case
+    {
+        std::string entity_id;
+        size_t data_size;
+        value_type initial_value;
+        uint64_t range_size;
+        size_t num_preload;
+        uint64_t interval_ms;
+        uint64_t exec_ms;
+        const std::set<kth_percentile_type> kth_percentiles;
+        size_t sample_size;
+        size_t num_threads;
+    } tests[] = {{"server_19", 0, 0, 2, 0, 50, 10, {}, 1, 1},
+                 {"server_20", 1, 0, 2, 0, 50, 10, {}, 1, 1},
+                 {"server_21", 1, 0, 2, 0, 50, 10, {p90}, 1, 1},
+                 {"server_22", 1, 0, 2, 0, 50, 10, {p50, p99}, 1, 1},
+                 {"server_23", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 1, 1},
+                 {"server_24", 1, 0, 2, 0, 50, 10, {p90}, 2, 1},
+                 {"server_25", 1, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},
+                 {"server_26", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},
+                 {"server_27", 2, 0, 2, 0, 50, 10, {p90}, 2, 1},
+                 {"server_28", 2, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},
+                 {"server_29", 2, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},
+                 {"server_30", 10, 0, 2, 0, 50, 10, {p90}, 16, 1},
+                 {"server_31", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},
+                 {"server_32", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},
+                 {"server_33", 10, 0, 2, 0, 50, 10, {p90}, 16, 2},
+                 {"server_34", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},
+                 {"server_35", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_36", 16, 0, 2, 0, 50, 10, {p90}, 16, 1},
+                 {"server_37", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},
+                 {"server_38", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},
+                 {"server_39", 16, 0, 2, 0, 50, 10, {p90}, 16, 2},
+                 {"server_40", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},
+                 {"server_41", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_42", 16, 0, 2, 5, 50, 10, {p90}, 16, 2},
+                 {"server_43", 16, 0, 2, 5, 50, 10, {p50, p99}, 16, 2},
+                 {"server_44", 16, 0, 2, 5, 50, 10, kAllKthPercentileTypes, 16, 2},
+                 {"server_45", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 1},
+                 {"server_46", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},
+                 {"server_47", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},
+                 {"server_48", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 4},
+                 {"server_49", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_50", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},
+                 {"server_51", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 1},
+                 {"server_52", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},
+                 {"server_53", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},
+                 {"server_54", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 4},
+                 {"server_55", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_56", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},
+                 {"server_57", 4096, 0, 5, 5, 50, 10, {p90}, 4096, 4},
+                 {"server_58", 4096, 0, 5, 5, 50, 10, {p50, p99}, 4096, 4},
+                 {"server_59", 4096, 0, 5, 5, 50, 10, kAllKthPercentileTypes, 4096, 4}};
+
+    for (const auto &test : tests) {
+        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);
+
+        CaseGenerator generator(
+            test.data_size, test.initial_value, test.range_size, test.kth_percentiles);
+
+        std::vector<value_type> data;
+        std::vector<value_type> expected_elements;
+        generator(data, expected_elements);
+
+        run_percentile<value_type, Prototype, Checker>(my_server_entity,
+                                                       prototype,
+                                                       data,
+                                                       test.num_preload,
+                                                       test.interval_ms,
+                                                       test.exec_ms,
+                                                       test.kth_percentiles,
+                                                       test.sample_size,
+                                                       test.num_threads,
+                                                       expected_elements,
+                                                       Checker());
+    }
+}
+
+template <typename T>
+class integral_checker
+{
+public:
+    void operator()(const T &actual_element, const T &expected_element) const
+    {
+        ASSERT_EQ(actual_element, expected_element);
+    }
+
+    void operator()(const std::vector<T> &actual_elements,
+                    const std::vector<T> &expected_elements) const
+    {
+        ASSERT_EQ(actual_elements, expected_elements);
+    }
+};
+
+TEST(metrics_test, percentile_int64)
+{
+    using value_type = int64_t;
+    run_percentile_cases<value_type,
+                         percentile_prototype<value_type>,
+                         integral_percentile_case_generator<value_type>,
+                         integral_checker<value_type>>(METRIC_test_percentile_int64);
+}
+
+template <typename T>
+class floating_checker
+{
+public:
+    void operator()(const T &actual_element, const T &expected_element) const
+    {
+        ASSERT_DOUBLE_EQ(actual_element, expected_element);
+    }
+
+    void operator()(const std::vector<T> &actual_elements,
+                    const std::vector<T> &expected_elements) const
+    {
+        ASSERT_EQ(actual_elements.size(), expected_elements.size());
+        for (size_t i = 0; i < expected_elements.size(); ++i) {
+            ASSERT_DOUBLE_EQ(actual_elements[i], expected_elements[i]);
+        }
+    }
+};
+
+TEST(metrics_test, percentile_double)
+{
+    using value_type = double;
+    run_percentile_cases<value_type,
+                         floating_percentile_prototype<value_type>,
+                         floating_percentile_case_generator<value_type>,
+                         floating_checker<value_type>>(METRIC_test_percentile_double);
+}
+
 } // namespace dsn
diff --git a/src/utils/test/percentile_utils.h b/src/utils/test/percentile_utils.h
new file mode 100644
index 0000000000..b764f5e0da
--- /dev/null
+++ b/src/utils/test/percentile_utils.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <type_traits>
+#include <vector>
+
+#include <dsn/c/api_utilities.h>
+#include <dsn/utility/metrics.h>
+#include <dsn/dist/fmt_logging.h>
+
+#include "nth_element_utils.h"
+
+namespace dsn {
+
+// The generator is used to produce the test cases randomly for unit tests and benchmarks of
+// percentile. This is implemented by converting kth percentiles to nth indexes, and calling
+// nth_element_case_generator to generate data and nth elements.
+template <typename NthElementCaseGenerator,
+          typename = typename std::enable_if<
+              std::is_arithmetic<typename NthElementCaseGenerator::value_type>::value>::type>
+class percentile_case_generator
+{
+public:
+    using value_type = typename NthElementCaseGenerator::value_type;
+    using container_type = typename NthElementCaseGenerator::container_type;
+    using size_type = typename NthElementCaseGenerator::size_type;
+    using nth_container_type = typename NthElementCaseGenerator::nth_container_type;
+
+    percentile_case_generator(size_type data_size,
+                              value_type initial_value,
+                              uint64_t range_size,
+                              const std::set<kth_percentile_type> &kth_percentiles)
+        : _nth_element_gen()
+    {
+        nth_container_type nths;
+        nths.reserve(kth_percentiles.size());
+        for (const auto &kth : kth_percentiles) {
+            auto size = static_cast<size_t>(data_size);
+            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, kth));
+            nths.push_back(nth);
+        }
+
+        _nth_element_gen.reset(
+            new NthElementCaseGenerator(data_size, initial_value, range_size, nths));
+    }
+
+    ~percentile_case_generator() = default;
+
+    // Call nth_element_case_generator internally to generate out-of-order `data` sized `data_size`
+    // and nth elements. See nth_element_case_generator for detailed implementations.
+    void operator()(container_type &data, container_type &elements)
+    {
+        (*_nth_element_gen)(data, elements);
+    }
+
+private:
+    std::unique_ptr<NthElementCaseGenerator> _nth_element_gen;
+
+    DISALLOW_COPY_AND_ASSIGN(percentile_case_generator);
+};
+
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+using integral_percentile_case_generator =
+    percentile_case_generator<integral_nth_element_case_generator<T>>;
+
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+using floating_percentile_case_generator =
+    percentile_case_generator<floating_nth_element_case_generator<T>>;
+
+} // namespace dsn

From 5d6dccc489b3c9dbdc7d911f5dd3570bcd17ed6a Mon Sep 17 00:00:00 2001
From: Dan Wang <empiredan@126.com>
Date: Mon, 20 Jun 2022 12:37:39 +0800
Subject: [PATCH 21/21] feat(new_metrics): put added files into file list of
 apache license

---
 .licenserc.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.licenserc.yaml b/.licenserc.yaml
index e9952b4d88..24e3b24426 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -94,6 +94,11 @@ header:
     - 'include/dsn/utility/rand.h'
     - 'include/dsn/utility/math.h'
     - 'include/dsn/utility/defer.h'
+    - 'include/dsn/utility/alloc.h'
+    - 'include/dsn/utility/casts.h'
+    - 'include/dsn/utility/long_adder.h'
+    - 'include/dsn/utility/metrics.h'
+    - 'include/dsn/utility/nth_element.h'
     - 'src/aio/aio_task.cpp'
     - 'src/aio/test/main.cpp'
     - 'src/meta/test/meta_http_service_test.cpp'
@@ -209,6 +214,17 @@ header:
     - 'src/utils/rand.cpp'
     - 'src/utils/throttling_controller.cpp'
     - 'src/utils/output_utils.cpp'
+    - 'src/utils/alloc.cpp'
+    - 'src/utils/long_adder.cpp'
+    - 'src/utils/long_adder_bench/long_adder_bench.cpp'
+    - 'src/utils/metrics.cpp'
+    - 'src/utils/shared_io_service.cpp'
+    - 'src/utils/test/long_adder_test.cpp'
+    - 'src/utils/test/metrics_test.cpp'
+    - 'src/utils/test/nth_element_bench/nth_element_bench.cpp'
+    - 'src/utils/test/nth_element_test.cpp'
+    - 'src/utils/test/nth_element_utils.h'
+    - 'src/utils/test/percentile_utils.h'
     - 'src/common/partition_split.thrift'
     - 'src/common/common.cpp'
     - 'src/common/consensus.thrift'