Skip to content

Commit 236f698

Browse files
committed
Refactor of LFU/LRU code for modularity
Signed-off-by: Jim Brunner <brunnerj@amazon.com>
1 parent 2da21d9 commit 236f698

File tree

12 files changed

+294
-217
lines changed

12 files changed

+294
-217
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ ENGINE_NAME=valkey
423423
SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
424424
ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
425425
ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_cluster.o trace/trace_server.o trace/trace_rdb.o trace/trace_aof.o
426-
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o
426+
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o cluster_migrateslots.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o lrulfu.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut9.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o
427427
ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ)
428428
ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
429429
ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o

src/cluster.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ void dumpCommand(client *c) {
198198

199199
/* RESTORE key ttl serialized-value [REPLACE] [ABSTTL] [IDLETIME seconds] [FREQ frequency] */
200200
void restoreCommand(client *c) {
201-
long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
201+
long long ttl, lfu_freq = -1, lru_idle = -1;
202202
uint16_t rdbver = 0;
203203
rio payload;
204204
int j, type, replace = 0, absttl = 0;
@@ -217,7 +217,6 @@ void restoreCommand(client *c) {
217217
addReplyError(c, "Invalid IDLETIME value, must be >= 0");
218218
return;
219219
}
220-
lru_clock = LRU_CLOCK();
221220
j++; /* Consume additional arg. */
222221
} else if (!strcasecmp(c->argv[j]->ptr, "freq") && additional >= 1 && lru_idle == -1) {
223222
if (getLongLongFromObjectOrReply(c, c->argv[j + 1], &lfu_freq, NULL) != C_OK) return;
@@ -305,7 +304,7 @@ void restoreCommand(client *c) {
305304
rewriteClientCommandArgument(c, c->argc, shared.absttl);
306305
}
307306
}
308-
objectSetLRUOrLFU(obj, lfu_freq, lru_idle, lru_clock, 1000);
307+
objectSetLRUOrLFU(obj, lfu_freq, lru_idle);
309308
signalModifiedKey(c, c->db, key);
310309
notifyKeyspaceEvent(NOTIFY_GENERIC, "restore", key, c->db->id);
311310
addReply(c, shared.ok);

src/db.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,6 @@ static int objectIsExpired(robj *val);
4949
static void dbSetValue(serverDb *db, robj *key, robj **valref, int overwrite, void **oldref);
5050
static robj *dbFindWithDictIndex(serverDb *db, sds key, int dict_index);
5151

52-
/* Update LFU when an object is accessed.
53-
* Firstly, decrement the counter if the decrement time is reached.
54-
* Then logarithmically increment the counter, and update the access time. */
55-
void updateLFU(robj *val) {
56-
unsigned long counter = LFUDecrAndReturn(val);
57-
counter = LFULogIncr(counter);
58-
val->lru = (LFUGetTimeInMinutes() << 8) | counter;
59-
}
6052

6153
/* Lookup a key for read or write operations, or return NULL if the key is not
6254
* found in the specified DB. This function implements the functionality of
@@ -118,11 +110,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) {
118110
if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) {
119111
/* Shared objects can't be stored in the database. */
120112
serverAssert(val->refcount != OBJ_SHARED_REFCOUNT);
121-
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
122-
updateLFU(val);
123-
} else {
124-
val->lru = LRU_CLOCK();
125-
}
113+
val->lru = lrulfu_touch(val->lru);
126114
}
127115

128116
if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE))) server.stat_keyspace_hits++;

src/debug.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,11 @@ void debugCommand(client *c) {
688688
s = sdscatprintf(s, "Value at:%p refcount:%d encoding:%s", (void *)val, val->refcount, strenc);
689689
if (!fast) s = sdscatprintf(s, " serializedlength:%zu", rdbSavedObjectLen(val, c->argv[2], c->db->id));
690690
/* Either lru or lfu field could work correctly which depends on server.maxmemory_policy. */
691-
s = sdscatprintf(s, " lru:%d lru_seconds_idle:%llu", val->lru, estimateObjectIdleTime(val) / 1000);
692-
s = sdscatprintf(s, " lfu_freq:%lu lfu_access_time_minutes:%u", LFUDecrAndReturn(val), val->lru >> 8);
691+
if (lrulfu_isUsingLFU()) {
692+
s = sdscatprintf(s, " lfu_freq:%u lfu_access_time_minutes:%u", objectGetLFUFrequency(val), val->lru >> 8);
693+
} else {
694+
s = sdscatprintf(s, " lru:%d lru_seconds_idle:%u", val->lru, lru_getIdleSecs(val->lru));
695+
}
693696
s = sdscatprintf(s, "%s", extra);
694697
addReplyStatusLength(c, s, sdslen(s));
695698
sdsfree(s);

src/evict.c

Lines changed: 2 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -67,38 +67,6 @@ static struct evictionPoolEntry *EvictionPoolLRU;
6767
* Implementation of eviction, aging and LRU
6868
* --------------------------------------------------------------------------*/
6969

70-
/* Return the LRU clock, based on the clock resolution. This is a time
71-
* in a reduced-bits format that can be used to set and check the
72-
* object->lru field of serverObject structures. */
73-
unsigned int getLRUClock(void) {
74-
return (mstime() / LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
75-
}
76-
77-
/* This function is used to obtain the current LRU clock.
78-
* If the current resolution is lower than the frequency we refresh the
79-
* LRU clock (as it should be in production servers) we return the
80-
* precomputed value, otherwise we need to resort to a system call. */
81-
unsigned int LRU_CLOCK(void) {
82-
unsigned int lruclock;
83-
if (1000 / server.hz <= LRU_CLOCK_RESOLUTION) {
84-
lruclock = server.lruclock;
85-
} else {
86-
lruclock = getLRUClock();
87-
}
88-
return lruclock;
89-
}
90-
91-
/* Given an object returns the min number of milliseconds the object was never
92-
* requested, using an approximated LRU algorithm. */
93-
unsigned long long estimateObjectIdleTime(robj *o) {
94-
unsigned long long lruclock = LRU_CLOCK();
95-
if (lruclock >= o->lru) {
96-
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
97-
} else {
98-
return (lruclock + (LRU_CLOCK_MAX - o->lru)) * LRU_CLOCK_RESOLUTION;
99-
}
100-
}
101-
10270
/* LRU approximation algorithm
10371
*
10472
* The server uses an approximation of the LRU algorithm that runs in constant
@@ -158,17 +126,8 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn
158126
/* Calculate the idle time according to the policy. This is called
159127
* idle just because the code initially handled LRU, but is in fact
160128
* just a score where a higher score means better candidate. */
161-
if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
162-
idle = estimateObjectIdleTime(o);
163-
} else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
164-
/* When we use an LRU policy, we sort the keys by idle time
165-
* so that we expire keys starting from greater idle time.
166-
* However when the policy is an LFU one, we have a frequency
167-
* estimation, and we want to evict keys with lower frequency
168-
* first. So inside the pool we put objects using the inverted
169-
* frequency subtracting the actual frequency to the maximum
170-
* frequency of 255. */
171-
idle = 255 - LFUDecrAndReturn(o);
129+
if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU | MAXMEMORY_FLAG_LFU)) {
130+
idle = objectGetIdleness(o);
172131
} else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
173132
/* In this case the sooner the expire the better. */
174133
idle = ULLONG_MAX - objectGetExpire(o);
@@ -230,88 +189,6 @@ int evictionPoolPopulate(serverDb *db, kvstore *samplekvs, struct evictionPoolEn
230189
return count;
231190
}
232191

233-
/* ----------------------------------------------------------------------------
234-
* LFU (Least Frequently Used) implementation.
235-
236-
* We have 24 total bits of space in each object in order to implement
237-
* an LFU (Least Frequently Used) eviction policy, since we re-use the
238-
* LRU field for this purpose.
239-
*
240-
* We split the 24 bits into two fields:
241-
*
242-
* 16 bits 8 bits
243-
* +------------------+--------+
244-
* + Last access time | LOG_C |
245-
* +------------------+--------+
246-
*
247-
* LOG_C is a logarithmic counter that provides an indication of the access
248-
* frequency. However this field must also be decremented otherwise what used
249-
* to be a frequently accessed key in the past, will remain ranked like that
250-
* forever, while we want the algorithm to adapt to access pattern changes.
251-
*
252-
* So the remaining 16 bits are used in order to store the "access time",
253-
* a reduced-precision Unix time (we take 16 bits of the time converted
254-
* in minutes since we don't care about wrapping around) where the LOG_C
255-
* counter decays every minute by default (depends on lfu-decay-time).
256-
*
257-
* New keys don't start at zero, in order to have the ability to collect
258-
* some accesses before being trashed away, so they start at LFU_INIT_VAL.
259-
* The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
260-
* when incrementing the key, so that keys starting at LFU_INIT_VAL
261-
* (or having a smaller value) have a very high chance of being incremented
262-
* on access. (The chance depends on counter and lfu-log-factor.)
263-
*
264-
* During decrement, the value of the logarithmic counter is decremented by
265-
* one when lfu-decay-time minutes elapsed.
266-
* --------------------------------------------------------------------------*/
267-
268-
/* Return the current time in minutes, just taking the least significant
269-
* 16 bits. The returned time is suitable to be stored as LDT (last access
270-
* time) for the LFU implementation. */
271-
unsigned long LFUGetTimeInMinutes(void) {
272-
return (server.unixtime / 60) & 65535;
273-
}
274-
275-
/* Given an object ldt (last access time), compute the minimum number of minutes
276-
* that elapsed since the last access. Handle overflow (ldt greater than
277-
* the current 16 bits minutes time) considering the time as wrapping
278-
* exactly once. */
279-
unsigned long LFUTimeElapsed(unsigned long ldt) {
280-
unsigned long now = LFUGetTimeInMinutes();
281-
if (now >= ldt) return now - ldt;
282-
return 65535 - ldt + now;
283-
}
284-
285-
/* Logarithmically increment a counter. The greater is the current counter value
286-
* the less likely is that it gets really incremented. Saturate it at 255. */
287-
uint8_t LFULogIncr(uint8_t counter) {
288-
if (counter == 255) return 255;
289-
double r = (double)rand() / RAND_MAX;
290-
double baseval = counter - LFU_INIT_VAL;
291-
if (baseval < 0) baseval = 0;
292-
double p = 1.0 / (baseval * server.lfu_log_factor + 1);
293-
if (r < p) counter++;
294-
return counter;
295-
}
296-
297-
/* If the object's ldt (last access time) is reached, decrement the LFU counter but
298-
* do not update LFU fields of the object, we update the access time
299-
* and counter in an explicit way when the object is really accessed.
300-
* And we will decrement the counter according to the times of
301-
* elapsed time than server.lfu_decay_time.
302-
* Return the object frequency counter.
303-
*
304-
* This function is used in order to scan the dataset for the best object
305-
* to fit: as we check for the candidate, we incrementally decrement the
306-
* counter of the scanned objects if needed. */
307-
unsigned long LFUDecrAndReturn(robj *o) {
308-
unsigned long ldt = o->lru >> 8;
309-
unsigned long counter = o->lru & 255;
310-
unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
311-
if (num_periods) counter = (num_periods > counter) ? 0 : counter - num_periods;
312-
return counter;
313-
}
314-
315192
/* We don't want to count AOF buffers and replicas output buffers as
316193
* used memory: the eviction should use mostly data size, because
317194
* it can cause feedback-loop when we push DELs into them, putting

src/lrulfu.c

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#include "lrulfu.h"
2+
#include "server.h"
3+
4+
#define LRULFU_MASK ((1 << LRULFU_BITS) - 1) /* Mask for LRU/LFU value */
5+
6+
/**************** LRU ****************/
7+
/* LRU uses a 24 bit timestamp of the last access time (in seconds)
8+
* The LRU value needs to be "touched" within 194 days, or the value will wrap,
9+
* and the last access time will appear to be recent.
10+
*/
11+
12+
/* The LRU_CLOCK_RESOLUTION is used to support an older ruby program which tests
13+
* the LRU behavior. This should be set to 1 if building Valkey to support this
14+
* ruby test. Otherwise, the default of 1000 is expected. */
15+
#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
16+
17+
18+
// Current time in seconds (24 least significant bits). Designed to roll over.
19+
static uint32_t LRUGetClockTime(void) {
20+
#if LRU_CLOCK_RESOLUTION == 1000
21+
return (uint32_t)(server.unixtime & LRULFU_MASK);
22+
#else
23+
return (uint32_t)((server.mstime / LRU_CLOCK_RESOLUTION) & LRULFU_MASK);
24+
#endif
25+
}
26+
27+
28+
uint32_t lru_import(uint32_t idle_secs) {
29+
uint32_t now = LRUGetClockTime();
30+
#if LRU_CLOCK_RESOLUTION != 1000
31+
idle_secs = (uint32_t)((long)idle_secs * 1000 / LRU_CLOCK_RESOLUTION);
32+
#endif
33+
idle_secs = idle_secs & LRULFU_MASK;
34+
// Underflow is ok/expected
35+
return (now - idle_secs) & LRULFU_MASK;
36+
}
37+
38+
39+
uint32_t lru_getIdleSecs(uint32_t lru) {
40+
// Underflow is ok/expected
41+
uint32_t seconds = (LRUGetClockTime() - lru) & LRULFU_MASK;
42+
#if LRU_CLOCK_RESOLUTION != 1000
43+
seconds = (uint32_t)((long)seconds * LRU_CLOCK_RESOLUTION / 1000);
44+
#endif
45+
return seconds;
46+
}
47+
48+
49+
/**************** LFU ****************/
50+
/* ----------------------------------------------------------------------------
51+
* LFU (Least Frequently Used) implementation.
52+
*
53+
* We split the 24 bits into two fields:
54+
*
55+
* 16 bits 8 bits
56+
* +-----------------------+--------+
57+
* + Last access (minutes) | LOG_C |
58+
* +-----------------------+--------+
59+
*
60+
* LOG_C is a logarithmic counter that provides an indication of the access
61+
* frequency. However this field must also be decremented otherwise what used
62+
* to be a frequently accessed key in the past, will remain ranked like that
63+
* forever, while we want the algorithm to adapt to access pattern changes.
64+
*
65+
* So the remaining 16 bits are used in order to store the "access time",
66+
* a reduced-precision Unix time (we take 16 bits of the time converted
67+
* in minutes since we don't care about wrapping around) where the LOG_C
68+
* counter decays every minute by default (depends on lfu-decay-time).
69+
*
70+
* New keys don't start at zero, in order to have the ability to collect
71+
* some accesses before being trashed away, so they start at LFU_INIT_VAL.
72+
* The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
73+
* when incrementing the key, so that keys starting at LFU_INIT_VAL
74+
* (or having a smaller value) have a very high chance of being incremented
75+
* on access. (The chance depends on counter and lfu-log-factor.)
76+
*
77+
* During decrement, the value of the logarithmic counter is decremented by
78+
* one when lfu-decay-time minutes elapsed.
79+
* --------------------------------------------------------------------------*/
80+
81+
#define LFU_INIT_VAL 5
82+
83+
84+
// Current time in minutes (16 least significant bits). Designed to roll over.
85+
static uint16_t LFUGetTimeInMinutes(void) {
86+
return (uint16_t)(server.unixtime / 60);
87+
}
88+
89+
90+
uint32_t lfu_import(uint8_t freq) {
91+
return ((uint32_t)LFUGetTimeInMinutes() << 8) | freq;
92+
}
93+
94+
95+
/* Update an LFU to consider decay, but doesn't add a "touch" */
96+
static uint32_t LFUDecay(uint32_t lfu) {
97+
uint16_t now = LFUGetTimeInMinutes();
98+
uint16_t prev_time = (uint16_t)(lfu >> 8);
99+
uint8_t freq = (uint8_t)lfu;
100+
uint16_t elapsed = now - prev_time; // Wrap-around expected/valid
101+
uint16_t num_periods = server.lfu_decay_time ? elapsed / server.lfu_decay_time : 0;
102+
freq = (num_periods > freq) ? 0 : freq - num_periods;
103+
return ((uint32_t)now << 8) | freq;
104+
}
105+
106+
107+
/* Increment the freq counter with logarithmic probability.
108+
* Values closer to 0 are more likely to increment.
109+
* Values closer to 255 are logarithmically less likely to increment. */
110+
static uint8_t LFULogIncr(uint8_t freq) {
111+
if (freq == 255) return freq;
112+
double r = (double)rand() / RAND_MAX;
113+
double baseval = (int)freq - LFU_INIT_VAL;
114+
if (baseval < 0) baseval = 0;
115+
double p = 1.0 / (baseval * server.lfu_log_factor + 1);
116+
if (r < p) freq++;
117+
return freq;
118+
}
119+
120+
121+
uint32_t lfu_touch(uint32_t lfu) {
122+
lfu = LFUDecay(lfu);
123+
uint8_t freq = (uint8_t)lfu;
124+
freq = LFULogIncr(freq);
125+
return (lfu & ~(uint32_t)UINT8_MAX) | freq;
126+
}
127+
128+
129+
uint32_t lfu_getFrequency(uint32_t lfu, uint8_t *freq) {
130+
lfu = LFUDecay(lfu);
131+
*freq = (uint8_t)lfu;
132+
return lfu;
133+
}
134+
135+
136+
/**************** Generic API ****************/
137+
138+
bool lrulfu_isUsingLFU(void) {
139+
return (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) != 0;
140+
}
141+
142+
143+
uint32_t lrulfu_init(void) {
144+
if (lrulfu_isUsingLFU()) {
145+
return lfu_import(LFU_INIT_VAL);
146+
} else {
147+
return lru_import(0);
148+
}
149+
}
150+
151+
152+
uint32_t lrulfu_getIdleness(uint32_t lrulfu, uint32_t *idleness) {
153+
if (lrulfu_isUsingLFU()) {
154+
uint8_t freq;
155+
lrulfu = lfu_getFrequency(lrulfu, &freq);
156+
*idleness = UINT8_MAX - freq;
157+
} else {
158+
*idleness = lru_getIdleSecs(lrulfu);
159+
}
160+
return lrulfu;
161+
}
162+
163+
164+
uint32_t lrulfu_touch(uint32_t lrulfu) {
165+
if (lrulfu_isUsingLFU()) {
166+
return lfu_touch(lrulfu);
167+
} else {
168+
return lru_import(0);
169+
}
170+
}

0 commit comments

Comments
 (0)