diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index edc8d66686..e9e34c5835 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -72,6 +72,7 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/geo.c ${CMAKE_SOURCE_DIR}/src/lazyfree.c ${CMAKE_SOURCE_DIR}/src/module.c + ${CMAKE_SOURCE_DIR}/src/lrulfu.c ${CMAKE_SOURCE_DIR}/src/evict.c ${CMAKE_SOURCE_DIR}/src/expire.c ${CMAKE_SOURCE_DIR}/src/geohash.c diff --git a/src/Makefile b/src/Makefile index 1ce9281678..aba37b8a0b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -423,7 +423,7 @@ ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_cluster.o trace/trace_server.o trace/trace_rdb.o trace/trace_aof.o -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o lrulfu.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ) ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o diff --git a/src/cluster.c b/src/cluster.c index a188bef335..b918da4a51 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -198,7 +198,7 @@ void dumpCommand(client *c) { /* RESTORE key ttl serialized-value [REPLACE] [ABSTTL] [IDLETIME seconds] [FREQ frequency] */ void restoreCommand(client *c) { - long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1; + long long ttl, lfu_freq = -1, lru_idle = -1; uint16_t rdbver = 0; rio payload; int j, type, replace = 0, absttl = 0; @@ -217,7 +217,6 @@ void restoreCommand(client *c) { addReplyError(c, "Invalid IDLETIME value, must be >= 0"); return; } - lru_clock = LRU_CLOCK(); j++; /* Consume additional arg. */ } else if (!strcasecmp(c->argv[j]->ptr, "freq") && additional >= 1 && lru_idle == -1) { if (getLongLongFromObjectOrReply(c, c->argv[j + 1], &lfu_freq, NULL) != C_OK) return; @@ -305,7 +304,7 @@ void restoreCommand(client *c) { rewriteClientCommandArgument(c, c->argc, shared.absttl); } } - objectSetLRUOrLFU(obj, lfu_freq, lru_idle, lru_clock, 1000); + objectSetLRUOrLFU(obj, lfu_freq, lru_idle); signalModifiedKey(c, c->db, key); notifyKeyspaceEvent(NOTIFY_GENERIC, "restore", key, c->db->id); addReply(c, shared.ok); diff --git a/src/db.c b/src/db.c index f398191d2d..c1e656edcb 100644 --- a/src/db.c +++ b/src/db.c @@ -49,14 +49,6 @@ static int objectIsExpired(robj *val); static void dbSetValue(serverDb *db, robj *key, robj **valref, int overwrite, void **oldref); static robj *dbFindWithDictIndex(serverDb *db, sds key, int dict_index); -/* Update LFU when an object is accessed. - * Firstly, decrement the counter if the decrement time is reached. - * Then logarithmically increment the counter, and update the access time. */ -void updateLFU(robj *val) { - unsigned long counter = LFUDecrAndReturn(val); - counter = LFULogIncr(counter); - val->lru = (LFUGetTimeInMinutes() << 8) | counter; -} /* Lookup a key for read or write operations, or return NULL if the key is not * found in the specified DB. This function implements the functionality of @@ -118,11 +110,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) { /* Shared objects can't be stored in the database. */ serverAssert(val->refcount != OBJ_SHARED_REFCOUNT); - if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - updateLFU(val); - } else { - val->lru = LRU_CLOCK(); - } + val->lru = lrulfu_touch(val->lru); } if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE))) server.stat_keyspace_hits++; diff --git a/src/debug.c b/src/debug.c index 56dbdfbb20..68b43f6099 100644 --- a/src/debug.c +++ b/src/debug.c @@ -688,8 +688,11 @@ void debugCommand(client *c) { s = sdscatprintf(s, "Value at:%p refcount:%d encoding:%s", (void *)val, val->refcount, strenc); if (!fast) s = sdscatprintf(s, " serializedlength:%zu", rdbSavedObjectLen(val, c->argv[2], c->db->id)); /* Either lru or lfu field could work correctly which depends on server.maxmemory_policy. */ - s = sdscatprintf(s, " lru:%d lru_seconds_idle:%llu", val->lru, estimateObjectIdleTime(val) / 1000); - s = sdscatprintf(s, " lfu_freq:%lu lfu_access_time_minutes:%u", LFUDecrAndReturn(val), val->lru >> 8); + if (lrulfu_isUsingLFU()) { + s = sdscatprintf(s, " lfu_freq:%u lfu_access_time_minutes:%u", objectGetLFUFrequency(val), val->lru >> 8); + } else { + s = sdscatprintf(s, " lru:%d lru_seconds_idle:%u", val->lru, lru_getIdleSecs(val->lru)); + } s = sdscatprintf(s, "%s", extra); addReplyStatusLength(c, s, sdslen(s)); sdsfree(s); diff --git a/src/evict.c b/src/evict.c index 6af136166e..ce602b2567 100644 --- a/src/evict.c +++ b/src/evict.c @@ -67,38 +67,6 @@ static struct evictionPoolEntry *EvictionPoolLRU; * Implementation of eviction, aging and LRU * --------------------------------------------------------------------------*/ -/* Return the LRU clock, based on the clock resolution. This is a time - * in a reduced-bits format that can be used to set and check the - * object->lru field of serverObject structures. */ -unsigned int getLRUClock(void) { - return (mstime() / LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; -} - -/* This function is used to obtain the current LRU clock. - * If the current resolution is lower than the frequency we refresh the - * LRU clock (as it should be in production servers) we return the - * precomputed value, otherwise we need to resort to a system call. */ -unsigned int LRU_CLOCK(void) { - unsigned int lruclock; - if (1000 / server.hz <= LRU_CLOCK_RESOLUTION) { - lruclock = server.lruclock; - } else { - lruclock = getLRUClock(); - } - return lruclock; -} - -/* Given an object returns the min number of milliseconds the object was never - * requested, using an approximated LRU algorithm. */ -unsigned long long estimateObjectIdleTime(robj *o) { - unsigned long long lruclock = LRU_CLOCK(); - if (lruclock >= o->lru) { - return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; - } else { - return (lruclock + (LRU_CLOCK_MAX - o->lru)) * LRU_CLOCK_RESOLUTION; - } -} - /* LRU approximation algorithm * * The server uses an approximation of the LRU algorithm that runs in constant @@ -158,17 +126,8 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn /* Calculate the idle time according to the policy. This is called * idle just because the code initially handled LRU, but is in fact * just a score where a higher score means better candidate. */ - if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { - idle = estimateObjectIdleTime(o); - } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - /* When we use an LRU policy, we sort the keys by idle time - * so that we expire keys starting from greater idle time. - * However when the policy is an LFU one, we have a frequency - * estimation, and we want to evict keys with lower frequency - * first. So inside the pool we put objects using the inverted - * frequency subtracting the actual frequency to the maximum - * frequency of 255. */ - idle = 255 - LFUDecrAndReturn(o); + if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU | MAXMEMORY_FLAG_LFU)) { + idle = objectGetIdleness(o); } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { /* In this case the sooner the expire the better. */ idle = ULLONG_MAX - objectGetExpire(o); @@ -230,88 +189,6 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn return count; } -/* ---------------------------------------------------------------------------- - * LFU (Least Frequently Used) implementation. - - * We have 24 total bits of space in each object in order to implement - * an LFU (Least Frequently Used) eviction policy, since we re-use the - * LRU field for this purpose. - * - * We split the 24 bits into two fields: - * - * 16 bits 8 bits - * +------------------+--------+ - * + Last access time | LOG_C | - * +------------------+--------+ - * - * LOG_C is a logarithmic counter that provides an indication of the access - * frequency. However this field must also be decremented otherwise what used - * to be a frequently accessed key in the past, will remain ranked like that - * forever, while we want the algorithm to adapt to access pattern changes. - * - * So the remaining 16 bits are used in order to store the "access time", - * a reduced-precision Unix time (we take 16 bits of the time converted - * in minutes since we don't care about wrapping around) where the LOG_C - * counter decays every minute by default (depends on lfu-decay-time). - * - * New keys don't start at zero, in order to have the ability to collect - * some accesses before being trashed away, so they start at LFU_INIT_VAL. - * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL - * when incrementing the key, so that keys starting at LFU_INIT_VAL - * (or having a smaller value) have a very high chance of being incremented - * on access. (The chance depends on counter and lfu-log-factor.) - * - * During decrement, the value of the logarithmic counter is decremented by - * one when lfu-decay-time minutes elapsed. - * --------------------------------------------------------------------------*/ - -/* Return the current time in minutes, just taking the least significant - * 16 bits. The returned time is suitable to be stored as LDT (last access - * time) for the LFU implementation. */ -unsigned long LFUGetTimeInMinutes(void) { - return (server.unixtime / 60) & 65535; -} - -/* Given an object ldt (last access time), compute the minimum number of minutes - * that elapsed since the last access. Handle overflow (ldt greater than - * the current 16 bits minutes time) considering the time as wrapping - * exactly once. */ -unsigned long LFUTimeElapsed(unsigned long ldt) { - unsigned long now = LFUGetTimeInMinutes(); - if (now >= ldt) return now - ldt; - return 65535 - ldt + now; -} - -/* Logarithmically increment a counter. The greater is the current counter value - * the less likely is that it gets really incremented. Saturate it at 255. */ -uint8_t LFULogIncr(uint8_t counter) { - if (counter == 255) return 255; - double r = (double)rand() / RAND_MAX; - double baseval = counter - LFU_INIT_VAL; - if (baseval < 0) baseval = 0; - double p = 1.0 / (baseval * server.lfu_log_factor + 1); - if (r < p) counter++; - return counter; -} - -/* If the object's ldt (last access time) is reached, decrement the LFU counter but - * do not update LFU fields of the object, we update the access time - * and counter in an explicit way when the object is really accessed. - * And we will decrement the counter according to the times of - * elapsed time than server.lfu_decay_time. - * Return the object frequency counter. - * - * This function is used in order to scan the dataset for the best object - * to fit: as we check for the candidate, we incrementally decrement the - * counter of the scanned objects if needed. */ -unsigned long LFUDecrAndReturn(robj *o) { - unsigned long ldt = o->lru >> 8; - unsigned long counter = o->lru & 255; - unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; - if (num_periods) counter = (num_periods > counter) ? 0 : counter - num_periods; - return counter; -} - /* We don't want to count AOF buffers and replicas output buffers as * used memory: the eviction should use mostly data size, because * it can cause feedback-loop when we push DELs into them, putting diff --git a/src/lrulfu.c b/src/lrulfu.c new file mode 100644 index 0000000000..2e5fa4944c --- /dev/null +++ b/src/lrulfu.c @@ -0,0 +1,170 @@ +#include "lrulfu.h" +#include "server.h" + +#define LRULFU_MASK ((1 << LRULFU_BITS) - 1) /* Mask for LRU/LFU value */ + +/**************** LRU ****************/ +/* LRU uses a 24 bit timestamp of the last access time (in seconds) + * The LRU value needs to be "touched" within 194 days, or the value will wrap, + * and the last access time will appear to be recent. + */ + +/* The LRU_CLOCK_RESOLUTION is used to support an older ruby program which tests + * the LRU behavior. This should be set to 1 if building Valkey to support this + * ruby test. Otherwise, the default of 1000 is expected. */ +#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */ + + +// Current time in seconds (24 least significant bits). Designed to roll over. +static uint32_t LRUGetClockTime(void) { +#if LRU_CLOCK_RESOLUTION == 1000 + return (uint32_t)(server.unixtime & LRULFU_MASK); +#else + return (uint32_t)((server.mstime / LRU_CLOCK_RESOLUTION) & LRULFU_MASK); +#endif +} + + +uint32_t lru_import(uint32_t idle_secs) { + uint32_t now = LRUGetClockTime(); +#if LRU_CLOCK_RESOLUTION != 1000 + idle_secs = (uint32_t)((long)idle_secs * 1000 / LRU_CLOCK_RESOLUTION); +#endif + idle_secs = idle_secs & LRULFU_MASK; + // Underflow is ok/expected + return (now - idle_secs) & LRULFU_MASK; +} + + +uint32_t lru_getIdleSecs(uint32_t lru) { + // Underflow is ok/expected + uint32_t seconds = (LRUGetClockTime() - lru) & LRULFU_MASK; +#if LRU_CLOCK_RESOLUTION != 1000 + seconds = (uint32_t)((long)seconds * LRU_CLOCK_RESOLUTION / 1000); +#endif + return seconds; +} + + +/**************** LFU ****************/ +/* ---------------------------------------------------------------------------- + * LFU (Least Frequently Used) implementation. + * + * We split the 24 bits into two fields: + * + * 16 bits 8 bits + * +-----------------------+--------+ + * + Last access (minutes) | LOG_C | + * +-----------------------+--------+ + * + * LOG_C is a logarithmic counter that provides an indication of the access + * frequency. However this field must also be decremented otherwise what used + * to be a frequently accessed key in the past, will remain ranked like that + * forever, while we want the algorithm to adapt to access pattern changes. + * + * So the remaining 16 bits are used in order to store the "access time", + * a reduced-precision Unix time (we take 16 bits of the time converted + * in minutes since we don't care about wrapping around) where the LOG_C + * counter decays every minute by default (depends on lfu-decay-time). + * + * New keys don't start at zero, in order to have the ability to collect + * some accesses before being trashed away, so they start at LFU_INIT_VAL. + * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL + * when incrementing the key, so that keys starting at LFU_INIT_VAL + * (or having a smaller value) have a very high chance of being incremented + * on access. (The chance depends on counter and lfu-log-factor.) + * + * During decrement, the value of the logarithmic counter is decremented by + * one when lfu-decay-time minutes elapsed. + * --------------------------------------------------------------------------*/ + +#define LFU_INIT_VAL 5 + + +// Current time in minutes (16 least significant bits). Designed to roll over. +static uint16_t LFUGetTimeInMinutes(void) { + return (uint16_t)(server.unixtime / 60); +} + + +uint32_t lfu_import(uint8_t freq) { + return ((uint32_t)LFUGetTimeInMinutes() << 8) | freq; +} + + +/* Update an LFU to consider decay, but doesn't add a "touch" */ +static uint32_t LFUDecay(uint32_t lfu) { + uint16_t now = LFUGetTimeInMinutes(); + uint16_t prev_time = (uint16_t)(lfu >> 8); + uint8_t freq = (uint8_t)lfu; + uint16_t elapsed = now - prev_time; // Wrap-around expected/valid + uint16_t num_periods = server.lfu_decay_time ? elapsed / server.lfu_decay_time : 0; + freq = (num_periods > freq) ? 0 : freq - num_periods; + return ((uint32_t)now << 8) | freq; +} + + +/* Increment the freq counter with logarithmic probability. + * Values closer to 0 are more likely to increment. + * Values closer to 255 are logarithmically less likely to increment. */ +static uint8_t LFULogIncr(uint8_t freq) { + if (freq == 255) return freq; + double r = (double)rand() / RAND_MAX; + double baseval = (int)freq - LFU_INIT_VAL; + if (baseval < 0) baseval = 0; + double p = 1.0 / (baseval * server.lfu_log_factor + 1); + if (r < p) freq++; + return freq; +} + + +uint32_t lfu_touch(uint32_t lfu) { + lfu = LFUDecay(lfu); + uint8_t freq = (uint8_t)lfu; + freq = LFULogIncr(freq); + return (lfu & ~(uint32_t)UINT8_MAX) | freq; +} + + +uint32_t lfu_getFrequency(uint32_t lfu, uint8_t *freq) { + lfu = LFUDecay(lfu); + *freq = (uint8_t)lfu; + return lfu; +} + + +/**************** Generic API ****************/ + +bool lrulfu_isUsingLFU(void) { + return (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) != 0; +} + + +uint32_t lrulfu_init(void) { + if (lrulfu_isUsingLFU()) { + return lfu_import(LFU_INIT_VAL); + } else { + return lru_import(0); + } +} + + +uint32_t lrulfu_getIdleness(uint32_t lrulfu, uint32_t *idleness) { + if (lrulfu_isUsingLFU()) { + uint8_t freq; + lrulfu = lfu_getFrequency(lrulfu, &freq); + *idleness = UINT8_MAX - freq; + } else { + *idleness = lru_getIdleSecs(lrulfu); + } + return lrulfu; +} + + +uint32_t lrulfu_touch(uint32_t lrulfu) { + if (lrulfu_isUsingLFU()) { + return lfu_touch(lrulfu); + } else { + return lru_import(0); + } +} diff --git a/src/lrulfu.h b/src/lrulfu.h new file mode 100644 index 0000000000..fc9e42077e --- /dev/null +++ b/src/lrulfu.h @@ -0,0 +1,71 @@ +#ifndef __LRULFU_H__ +#define __LRULFU_H__ + +#include +#include + +/* LRU (Least Recently Used) and LFU (Least Frequently Used) numeric logic. + * + * Implementation of a 24 bit value which may either be an LRU or LFU value as indicated by the + * server's maxmemory_policy. + * + * LRU - the value consists of a 24-bit time in seconds. This value will roll over after 194 days. + * (If a value is not touched for 194 days, it will appear as recent.) + * + * LFU - maintains an approximate logarithmic value indicating the frequency of access. The first + * 16 bits maintain the last evaluation time in minutes. The remaining 8 bits maintain an + * approximate frequency of use. The time value will roll over after 45 days. If a value + * is not evaluated in this time, it may not show as decayed after this time. + * + * Returned values are guaranteed to fit in an unsigned 24-bit region. They can safely be packed + * like: + * struct { + * uint32_t lru : LRU_BITS; + * } + */ + +#define LRULFU_BITS 24 + +/**************** LRU ****************/ + +/* Import a given LRU idleness to the current time. */ +uint32_t lru_import(uint32_t idle_secs); + +/* Get the current idle secs from the given LRU value. */ +uint32_t lru_getIdleSecs(uint32_t lru); + +/**************** LFU ****************/ + +/* Import a given LFU frequency to the current time. */ +uint32_t lfu_import(uint8_t freq); + +/* Update/Touch an LFU value, decays the old value and adds a "touch". */ +uint32_t lfu_touch(uint32_t lfu); + +/* Return the LFU frequency, without adding a touch. + * An updated LFU is returned which maintains the decay on the LFU. */ +uint32_t lfu_getFrequency(uint32_t lfu, uint8_t *freq); + + +/**************** Generic API ****************/ +/* These API functions can be used interchangeably between LRU and LFU, depending on the setting of + * server.maxmemory_policy. It is preferred to use these functions rather than directly accessing + * the LRU/LFU API functions if the use case permits. Note that if the server's policy is changed, + * LRU <-> LFU, evaluations will be incorrect until values have had time to be touched/updated.*/ + +/* Is the server using LFU policy? */ +bool lrulfu_isUsingLFU(void); + +/* Provide an initial value for LRU or LFU */ +uint32_t lrulfu_init(void); + +/* Return a relative indication of idleness, used for comparison between LRU or LFU values. + * A greater number indicates a greater degree of idleness. + * + * Returns an updated LRU/LFU value, maintaining the data, without a "touch". */ +uint32_t lrulfu_getIdleness(uint32_t lrulfu, uint32_t *idleness); + +/* Add a touch to the LRU or LFU value, returning the updated LRU/LFU. */ +uint32_t lrulfu_touch(uint32_t lrulfu); + +#endif diff --git a/src/module.c b/src/module.c index 3aab4639b1..5152cfd7f7 100644 --- a/src/module.c +++ b/src/module.c @@ -13737,7 +13737,7 @@ size_t moduleCount(void) { * returns VALKEYMODULE_OK if the LRU was updated, VALKEYMODULE_ERR otherwise. */ int VM_SetLRU(ValkeyModuleKey *key, mstime_t lru_idle) { if (!key->value) return VALKEYMODULE_ERR; - if (objectSetLRUOrLFU(key->value, -1, lru_idle, lru_idle >= 0 ? LRU_CLOCK() : 0, 1)) return VALKEYMODULE_OK; + if (objectSetLRUOrLFU(key->value, -1, lru_idle * 1000)) return VALKEYMODULE_OK; return VALKEYMODULE_ERR; } @@ -13749,7 +13749,7 @@ int VM_GetLRU(ValkeyModuleKey *key, mstime_t *lru_idle) { *lru_idle = -1; if (!key->value) return VALKEYMODULE_ERR; if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) return VALKEYMODULE_OK; - *lru_idle = estimateObjectIdleTime(key->value); + *lru_idle = objectGetLRUIdleSecs(key->value) * 1000; return VALKEYMODULE_OK; } @@ -13760,7 +13760,7 @@ int VM_GetLRU(ValkeyModuleKey *key, mstime_t *lru_idle) { * returns VALKEYMODULE_OK if the LFU was updated, VALKEYMODULE_ERR otherwise. */ int VM_SetLFU(ValkeyModuleKey *key, long long lfu_freq) { if (!key->value) return VALKEYMODULE_ERR; - if (objectSetLRUOrLFU(key->value, lfu_freq, -1, 0, 1)) return VALKEYMODULE_OK; + if (objectSetLRUOrLFU(key->value, lfu_freq, -1)) return VALKEYMODULE_OK; return VALKEYMODULE_ERR; } @@ -13770,7 +13770,7 @@ int VM_SetLFU(ValkeyModuleKey *key, long long lfu_freq) { int VM_GetLFU(ValkeyModuleKey *key, long long *lfu_freq) { *lfu_freq = -1; if (!key->value) return VALKEYMODULE_ERR; - if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) *lfu_freq = LFUDecrAndReturn(key->value); + if (lrulfu_isUsingLFU()) *lfu_freq = objectGetLFUFrequency(key->value); return VALKEYMODULE_OK; } diff --git a/src/object.c b/src/object.c index 41a7bd50cd..19d9fd047f 100644 --- a/src/object.c +++ b/src/object.c @@ -110,14 +110,7 @@ robj *createObject(int type, void *ptr) { void initObjectLRUOrLFU(robj *o) { if (o->refcount == OBJ_SHARED_REFCOUNT) return; - /* Set the LRU to the current lruclock (minutes resolution), or - * alternatively the LFU counter. */ - if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - o->lru = (LFUGetTimeInMinutes() << 8) | LFU_INIT_VAL; - } else { - o->lru = LRU_CLOCK(); - } - return; + o->lru = lrulfu_init(); } /* Set a special refcount in the object to make it "shared": @@ -1582,32 +1575,39 @@ sds getMemoryDoctorReport(void) { return s; } +/* Return the LFU frequency for an object. */ +uint8_t objectGetLFUFrequency(robj *o) { + uint8_t freq; + o->lru = lfu_getFrequency(o->lru, &freq); + return freq; +} + +/* Return the LRU idle time for an object. */ +uint32_t objectGetLRUIdleSecs(robj *o) { + return lru_getIdleSecs(o->lru); +} + +/* Return an indication of idleness. Larger numbers are more idle. */ +uint32_t objectGetIdleness(robj *o) { + uint32_t idleness; + o->lru = lrulfu_getIdleness(o->lru, &idleness); + return idleness; +} + /* Set the object LRU/LFU depending on server.maxmemory_policy. * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU. * The lru_idle and lru_clock args are only relevant if policy * is MAXMEMORY_FLAG_LRU. * Either or both of them may be <0, in that case, nothing is set. */ -int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier) { - if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { +int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle_secs) { + if (lrulfu_isUsingLFU()) { if (lfu_freq >= 0) { serverAssert(lfu_freq <= 255); - val->lru = (LFUGetTimeInMinutes() << 8) | lfu_freq; + val->lru = lfu_import((uint8_t)lfu_freq); return 1; } - } else if (lru_idle >= 0) { - /* Provided LRU idle time is in seconds. Scale - * according to the LRU clock resolution this - * instance was compiled with (normally 1000 ms, so the - * below statement will expand to lru_idle*1000/1000. */ - lru_idle = lru_idle * lru_multiplier / LRU_CLOCK_RESOLUTION; - long lru_abs = lru_clock - lru_idle; /* Absolute access time. */ - /* If the LRU field underflows (since lru_clock is a wrapping clock), - * we need to make it positive again. This will be handled by the unwrapping - * code in estimateObjectIdleTime. I.e. imagine a day when lru_clock - * wrap arounds (happens once in some 6 months), and becomes a low - * value, like 10, an lru_idle of 1000 should be near LRU_CLOCK_MAX. */ - if (lru_abs < 0) lru_abs += LRU_CLOCK_MAX; - val->lru = lru_abs; + } else if (lru_idle_secs >= 0) { + val->lru = lru_import(lru_idle_secs); return 1; } return 0; @@ -1660,7 +1660,7 @@ void objectCommand(client *c) { "switching between policies at runtime LRU and LFU data will take some time to adjust."); return; } - addReplyLongLong(c, estimateObjectIdleTime(o) / 1000); + addReplyLongLong(c, lru_getIdleSecs(o->lru)); } else if (!strcasecmp(c->argv[1]->ptr, "freq") && c->argc == 3) { if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return; if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) { @@ -1669,11 +1669,7 @@ void objectCommand(client *c) { "when switching between policies at runtime LRU and LFU data will take some time to adjust."); return; } - /* LFUDecrAndReturn should be called - * in case of the key has not been accessed for a long time, - * because we update the access time only - * when the key is read or overwritten. */ - addReplyLongLong(c, LFUDecrAndReturn(o)); + addReplyLongLong(c, objectGetLFUFrequency(o)); } else { addReplySubcommandSyntaxError(c); } diff --git a/src/rdb.c b/src/rdb.c index 6d0f8af615..a99e5930ed 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1184,22 +1184,20 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, in /* Save the LRU info. */ if (savelru) { - uint64_t idletime = estimateObjectIdleTime(val); - idletime /= 1000; /* Using seconds is enough and requires less space.*/ + uint64_t idletime = objectGetLRUIdleSecs(val); if (rdbSaveType(rdb, RDB_OPCODE_IDLE) == -1) return -1; if (rdbSaveLen(rdb, idletime) == -1) return -1; } /* Save the LFU info. */ if (savelfu) { - uint8_t buf[1]; - buf[0] = LFUDecrAndReturn(val); + uint8_t freq = objectGetLFUFrequency(val); /* We can encode this in exactly two bytes: the opcode and an 8 * bit counter, since the frequency is logarithmic with a 0-255 range. * Note that we do not store the halving time because to reset it * a single time when loading does not affect the frequency much. */ if (rdbSaveType(rdb, RDB_OPCODE_FREQ) == -1) return -1; - if (rdbWriteRaw(rdb, buf, 1) == -1) return -1; + if (rdbWriteRaw(rdb, &freq, 1) == -1) return -1; } /* Save type, key, value */ @@ -3119,7 +3117,6 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin /* Key-specific attributes, set by opcodes before the key type. */ long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime(); - long long lru_clock = LRU_CLOCK(); while (1) { sds key; @@ -3464,7 +3461,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin } /* Set usage information (for eviction). */ - objectSetLRUOrLFU(val, lfu_freq, lru_idle, lru_clock, 1000); + objectSetLRUOrLFU(val, lfu_freq, lru_idle); /* call key space notification on key loaded for modules only */ moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id); diff --git a/src/server.c b/src/server.c index 44687cc11f..bb155d5f79 100644 --- a/src/server.c +++ b/src/server.c @@ -1513,19 +1513,6 @@ long long serverCron(struct aeEventLoop *eventLoop, long long id, void *clientDa server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1); } - /* We have just LRU_BITS bits per object for LRU information. - * So we use an (eventually wrapping) LRU clock. - * - * Note that even if the counter wraps it's not a big problem, - * everything will still work but some object will appear younger - * to the server. However for this to happen a given object should never be - * touched for all the time needed to the counter to wrap, which is - * not likely. - * - * Note that you can change the resolution altering the - * LRU_CLOCK_RESOLUTION define. */ - server.lruclock = getLRUClock(); - cronUpdateMemoryStats(); /* We received a SIGTERM or SIGINT, shutting down here in a safe way, as it is @@ -2287,7 +2274,6 @@ void initServerConfig(void) { server.latency_tracking_info_percentiles[1] = 99.0; /* p99 */ server.latency_tracking_info_percentiles[2] = 99.9; /* p999 */ - server.lruclock = getLRUClock(); resetServerSaveParams(); appendServerSaveParams(60 * 60, 1); /* save after 1 hour and 1 change */ @@ -5935,7 +5921,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) { "hz:%i\r\n", server.hz, "configured_hz:%i\r\n", server.hz, "clients_hz:%i\r\n", server.clients_hz, - "lru_clock:%u\r\n", server.lruclock, + "lru_clock:%u\r\n", server.unixtime & ((1 << LRULFU_BITS) - 1), "executable:%s\r\n", server.executable ? server.executable : "", "config_file:%s\r\n", server.configfile ? server.configfile : "", "io_threads_active:%i\r\n", server.active_io_threads_num > 1, diff --git a/src/server.h b/src/server.h index 1ae64e0b0d..fb02649e34 100644 --- a/src/server.h +++ b/src/server.h @@ -82,6 +82,7 @@ #include "vset.h" #include "trace/trace.h" #include "entry.h" +#include "lrulfu.h" #ifdef USE_LTTNG #define valkey_fork() do_fork() @@ -783,10 +784,6 @@ typedef struct ValkeyModuleType moduleType; #define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */ #define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */ -#define LRU_BITS 24 -#define LRU_CLOCK_MAX ((1 << LRU_BITS) - 1) /* Max value of obj->lru */ -#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */ - #define OBJ_REFCOUNT_BITS 30 #define OBJ_SHARED_REFCOUNT ((1 << OBJ_REFCOUNT_BITS) - 1) /* Global object never destroyed. */ #define OBJ_STATIC_REFCOUNT ((1 << OBJ_REFCOUNT_BITS) - 2) /* Object allocated in the stack. */ @@ -794,9 +791,7 @@ typedef struct ValkeyModuleType moduleType; struct serverObject { unsigned type : 4; unsigned encoding : 4; - unsigned lru : LRU_BITS; /* LRU time (relative to global lru_clock) or - * LFU data (least significant 8 bits frequency - * and most significant 16 bits access time). */ + unsigned lru : LRULFU_BITS; unsigned hasexpire : 1; unsigned hasembkey : 1; unsigned refcount : OBJ_REFCOUNT_BITS; @@ -1681,7 +1676,6 @@ struct valkeyServer { _Atomic AeIoState io_poll_state; /* Indicates the state of the IO polling. */ int io_ae_fired_events; /* Number of poll events received by the IO thread. */ rax *errors; /* Errors table */ - unsigned int lruclock; /* Clock for LRU eviction */ volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */ mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */ int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */ @@ -3055,7 +3049,6 @@ char *strEncoding(int encoding); int compareStringObjects(const robj *a, const robj *b); int collateStringObjects(const robj *a, const robj *b); int equalStringObjects(robj *a, robj *b); -unsigned long long estimateObjectIdleTime(robj *o); void trimStringObjectIfNeeded(robj *o, int trim_small_values); #define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR) @@ -3065,6 +3058,9 @@ robj *objectSetKeyAndExpire(robj *val, sds key, long long expire); robj *objectSetExpire(robj *val, long long expire); sds objectGetKey(const robj *val); long long objectGetExpire(const robj *val); +uint8_t objectGetLFUFrequency(robj *o); +uint32_t objectGetLRUIdleSecs(robj *o); +uint32_t objectGetIdleness(robj *o); /* Synchronous I/O with timeout */ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout); @@ -3378,8 +3374,6 @@ void exitExecutionUnit(void); void resetServerStats(void); void monitorActiveDefrag(void); void defragWhileBlocked(void); -unsigned int getLRUClock(void); -unsigned int LRU_CLOCK(void); const char *evictPolicyToString(void); struct serverMemOverhead *getMemoryOverheadData(void); void freeMemoryOverheadData(struct serverMemOverhead *mh); @@ -3588,7 +3582,7 @@ robj *lookupKeyReadWithFlags(serverDb *db, robj *key, int flags); robj *lookupKeyWriteWithFlags(serverDb *db, robj *key, int flags); robj *objectCommandLookup(client *c, robj *key); robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply); -int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier); +int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle_secs); #define LOOKUP_NONE 0 #define LOOKUP_NOTOUCH (1 << 0) /* Don't update LRU. */ #define LOOKUP_NONOTIFY (1 << 1) /* Don't trigger keyspace event on key misses. */ @@ -3748,10 +3742,6 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms); /* evict.c -- maxmemory handling and LRU eviction. */ void evictionPoolAlloc(void); -#define LFU_INIT_VAL 5 -unsigned long LFUGetTimeInMinutes(void); -uint8_t LFULogIncr(uint8_t value); -unsigned long LFUDecrAndReturn(robj *o); #define EVICT_OK 0 #define EVICT_RUNNING 1 #define EVICT_FAIL 2