Decode set as listpack and stream as listpack3.

sripathikrishnan · Nanciico · Mar 27, 2023 · Mar 27, 2023 · Mar 28, 2023 · Mar 28, 2023
commit dee6d474a9d48a97008aab475c4eba61e4ea38b9
diff --git a/rdbtools/parser.py b/rdbtools/parser.py
@@ -58,6 +58,8 @@
 REDIS_RDB_TYPE_ZSET_LISTPACK = 17
 REDIS_RDB_TYPE_LIST_QUICKLIST_2 = 18
 REDIS_RDB_TYPE_STREAM_LISTPACKS_2 = 19
+REDIS_RDB_TYPE_SET_LISTPACK = 20
+REDIS_RDB_TYPE_STREAM_LISTPACKS_3 = 21
 
 REDIS_RDB_ENC_INT8 = 0
 REDIS_RDB_ENC_INT16 = 1
@@ -74,7 +76,7 @@
 DATA_TYPE_MAPPING = {
     0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash", 5 : "sortedset", 6 : "module", 7: "module",
     9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash", 14 : "list", 15 : "stream", 16 : "hash", 
-    17: "sortedset", 18: "list", 19: "stream"}
+    17: "sortedset", 18: "list", 19: "stream", 20: "set", 21: "stream"}
 
 class RdbCallback(object):
     """
@@ -593,14 +595,18 @@ def read_object(self, f, enc_type) :
             raise Exception('read_object', 'Unable to read Redis Modules RDB objects (key %s)' % self._key)
         elif enc_type == REDIS_RDB_TYPE_MODULE_2:
             self.read_module(f)
-        elif enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS or enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_2:
+        elif enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS or \
+             enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_2 or \
+             enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_3:
             self.read_stream(f, enc_type)
         elif enc_type == REDIS_RDB_TYPE_HASH_LISTPACK:
             self.read_hash_from_listpack(f)
         elif enc_type == REDIS_RDB_TYPE_ZSET_LISTPACK:
             self.read_zset_from_listpack(f)
         elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST_2:
             self.read_list_from_quicklist2(f)
+        elif enc_type == REDIS_RDB_TYPE_SET_LISTPACK:
+            self.read_set_from_listpack(f)
         else:
             raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
 
@@ -668,14 +674,18 @@ def skip_object(self, f, enc_type):
             raise Exception('skip_object', 'Unable to skip Redis Modules RDB objects (key %s)' % self._key)
         elif enc_type == REDIS_RDB_TYPE_MODULE_2:
             self.skip_module(f)
-        elif enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS or enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_2:
+        elif enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS or \
+             enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_2 or \
+             enc_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_3:
             self.skip_stream(f, enc_type)
         elif enc_type == REDIS_RDB_TYPE_HASH_LISTPACK:
             skip_strings = 1
         elif enc_type == REDIS_RDB_TYPE_ZSET_LISTPACK:
             skip_strings = 1
         elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST_2:
             self.skip_list_from_quicklist2(f)
+        elif enc_type == REDIS_RDB_TYPE_SET_LISTPACK:
+            skip_strings = 1
         else:
             raise Exception('skip_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
         for x in range(0, skip_strings):
@@ -918,6 +928,8 @@ def skip_stream(self, f, rdb_type):
             for _c in range(consumers):
                 self.skip_string(f)
                 f.read(8)
+                if rdb_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_3:
+                    f.read(8)
                 pending = self.read_length(f)
                 f.read(pending*16)
 
@@ -958,13 +970,17 @@ def read_stream(self, f, rdb_type):
             for _c in range(consumers):
                 cname = self.read_string(f)
                 seen_time = read_milliseconds_time(f)
+                active_time = seen_time
+                if rdb_type == REDIS_RDB_TYPE_STREAM_LISTPACKS_3:
+                    active_time = read_milliseconds_time(f)
                 pending = self.read_length(f)
                 consumer_pending_entries = []
-                for _pel in range( pending):
+                for _pel in range(pending):
                     eid = f.read(16)
                     consumer_pending_entries.append({'id': eid})
                 consumers_data.append({'name': cname,
                                        'seen_time': seen_time,
+                                       'active_time': active_time,
                                        'pending': consumer_pending_entries})
             cgroups_data.append({'name': cgname,
                                  'last_entry_id': last_cg_entry_id,
@@ -988,7 +1004,7 @@ def read_hash_from_listpack(self, f):
             self._callback.hset(self._key, field, value)
         listpack_end = read_unsigned_char(buff)
         if listpack_end != 255:
-            raise Exception('read_hash_from_ziplist', "Invalid zip list end - %d for key %s" % (listpack_end, self._key))
+            raise Exception('read_hash_from_listpack', "Invalid listpack end - %d for key %s" % (listpack_end, self._key))
         self._callback.end_hash(self._key)
 
     def read_listpack_entry(self, f):
@@ -1004,8 +1020,7 @@ def read_listpack_entry(self, f):
             value = f.read(length)
         elif (encoding_type >> 5) == 6:
             bytes.append(read_unsigned_char(f))
-            value = c_int16(((bytes[0] & 0x1F) << 11) | (bytes[1]) << 3).value
-            value >>= 3
+            value = (c_int16(((bytes[0] & 0x1F) << 11) | (bytes[1]) << 3).value) >> 3
         elif (encoding_type >> 4) == 14:
             length = ((encoding_type & 0xF) << 8) | read_unsigned_char(f)
             value = f.read(length)
@@ -1029,7 +1044,6 @@ def read_listpack_entry(self, f):
             length >>= 7
             if length == 0:
                 break
-
         return value
 
     def read_zset_from_listpack(self, f):
@@ -1049,7 +1063,7 @@ def read_zset_from_listpack(self, f):
             self._callback.zadd(self._key, score, member)
         listpack_end = read_unsigned_char(buff)
         if listpack_end != 255 :
-            raise Exception('read_hash_from_ziplist', "Invalid zip list end - %d for key %s" % (listpack_end, self._key))
+            raise Exception('read_zset_from_listpack', "Invalid listpack end - %d for key %s" % (listpack_end, self._key))
         self._callback.end_sorted_set(self._key)
 
     def read_list_from_quicklist2(self, f):
@@ -1069,6 +1083,20 @@ def read_list_from_quicklist2(self, f):
                 raise Exception('read_quicklist2', "Invalid listpack end - %d for key %s" % (listpack_end, self._key))
         self._callback.end_list(self._key, info={'encoding': 'quicklist2', 'quicklist2': count, 'sizeof_value': total_size})
 
+    def read_set_from_listpack(self, f):
+        raw_string = self.read_string(f)
+        buff = BytesIO(raw_string)
+        lpbytes = read_unsigned_int(buff)
+        num_entries = read_unsigned_short(buff)
+        self._callback.start_set(self._key, num_entries, self._expiry, info={'encoding':'listpack', 'idle':self._idle,'freq':self._freq})
+        for x in range(0, num_entries) :
+            member = self.read_listpack_entry(buff)
+            self._callback.sadd(self._key, member)
+        listpack_end = read_unsigned_char(buff)
+        if listpack_end != 255 :
+            raise Exception('read_set_from_listpack', "Invalid listpack end - %d for key %s" % (listpack_end, self._key))
+        self._callback.end_set(self._key)
+
 
     charset = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
 
@@ -1092,7 +1120,7 @@ def verify_magic_string(self, magic_string) :
 
     def verify_version(self, version_str) :
         version = int(version_str)
-        if version < 1 or version > 10:
+        if version < 1 or version > 11:
             raise Exception('verify_version', 'Invalid RDB version number %d' % version)
         self._rdb_version = version
 

diff --git a/tests/create_test_rdb.py b/tests/create_test_rdb.py
@@ -36,7 +36,8 @@ def create_test_rdbs(path_to_redis_dump, dump_folder):
 #                listpack_multibyte_encodings_13_bit_signed_integer,
 #                listpack_multibyte_encodings_small_strings,
 #                listpack_multibyte_encodings_large_strings,
-                listpack_multibyte_encodings_integer,
+#                listpack_multibyte_encodings_integer,
+                set_as_listpack,
 #                streams,
             )
     for t in tests:
@@ -166,7 +167,7 @@ def regular_set():
 def sorted_set_as_ziplist():
     dict = {'8b6ba6718a786daefa69438148361901': 1, 'cb7a24bb7528f934b841b34c3a73e0c7': 2.37, '523af537946b79c4f8369ed39ba78605': 3.423}
     r.zadd("sorted_set_as_ziplist", dict)
-    
+
 def regular_sorted_set():
     num_entries = 500
     dict = {}
@@ -222,6 +223,14 @@ def listpack_multibyte_encodings_integer():
     r.rpush("listpack_multibyte_encodings_integer", -0x7ffefffefffefffe)
     r.rpush("listpack_multibyte_encodings_integer", 0x7ffefffefffefffe)
 
+def set_as_listpack():
+    r.sadd("set_as_listpack", "abc")
+    r.sadd("set_as_listpack", "abcdefg")
+    r.sadd("set_as_listpack", "abcdefghijklmn")
+    r.sadd("set_as_listpack", -3)
+    r.sadd("set_as_listpack", 50)
+    r.sadd("set_as_listpack", -70)
+
 def streams():
     stream1 = {'temp_f': 87.2, 'pressure': 29.69, 'humidity': 46}
     stream2 = {'temp_f': 83.1, 'pressure': 29.21, 'humidity': 46.5}
@@ -250,4 +259,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-
diff --git a/tests/dumps/set_as_listpack.rdb b/tests/dumps/set_as_listpack.rdb
diff --git a/tests/parser_tests.py b/tests/parser_tests.py
@@ -208,6 +208,15 @@ def test_listpack_multibyte_encodings_integer(self):
         for num in expected_numbers :
             self.assert_(num in r.databases[0][b"listpack_multibyte_encodings_integer"], "Cannot find %d" % num)
 
+    def test_set_as_listpack(self):
+        r = load_rdb('set_as_listpack.rdb')
+        self.assertEquals(r.lengths[0][b"set_as_listpack"], 6)
+        expected_numbers = [-3, 50, -70]
+        for member in (b"abc", b"abcdefg", b"abcdefghijklmn") :
+            self.assert_(member in r.databases[0][b"set_as_listpack"], msg=('%s missing' % member))
+        for num in expected_numbers :
+            self.assert_(num in r.databases[0][b"set_as_listpack"], "Cannot find %d" % num)
+
     def test_streams(self):
         r = load_rdb('streams.rdb')
         self.assertEquals(r.lengths[0][b"streams"], 3)