Skip to content

Commit 4865b63

Browse files
create a map for leaves
1 parent b85a34e commit 4865b63

File tree

10 files changed

+296
-29
lines changed

10 files changed

+296
-29
lines changed

contrib/format.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
set -e
44

5-
for file in $(find src/ include/ -name *.h)
5+
for file in $(find src/ include/ -name *.[h,c])
66
do
77
clang-format -i $file
88
done

src/flat_file.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct utreexo_forest_file {
5858

5959
/* Things we need to keep through different sessions, they are persisted at the
6060
* beginning of a file
61-
* */
61+
*/
6262
struct utreexo_forest_file_header {
6363
uint64_t magic;
6464
struct utreexo_forest_page_header *wrt_page; // Which page are we on
@@ -69,26 +69,24 @@ struct utreexo_forest_file_header {
6969
} __attribute__((__packed__));
7070

7171
/* The size of a page minus it's header */
72-
const inline uint64_t utreexo_page_data_size() {
72+
static inline uint64_t utreexo_page_data_size() {
7373
return NODES_PER_PAGE * sizeof(utreexo_forest_node);
7474
}
7575

7676
/* The size of a whole page */
77-
const inline uint64_t utreexo_page_size() {
77+
static inline uint64_t utreexo_page_size() {
7878
return utreexo_page_data_size() + sizeof(struct utreexo_forest_page_header);
7979
}
8080

8181
/* A pointer to the page's data (excludes the header) */
82-
const inline utreexo_forest_node *utreexo_page_data(char *data, size_t n) {
82+
static inline utreexo_forest_node *utreexo_page_data(char *data, size_t n) {
8383
return (utreexo_forest_node *)(data + (utreexo_page_size() * n) +
8484
sizeof(struct utreexo_forest_page_header));
8585
}
8686

8787
/* A pointer to the page's data */
88-
const inline struct utreexo_forest_page_header *utreexo_page(char *data,
89-
size_t n) {
90-
return (struct utreexo_forest_page_header *)(data +
91-
(utreexo_page_size() * n));
88+
static inline void *utreexo_page(char *data, size_t n) {
89+
return (void *)(data + (utreexo_page_size() * n));
9290
}
9391

9492
/* Close the file, and free the memory */

src/flat_file_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static inline void utreexo_forest_file_init(struct utreexo_forest_file **file,
4040

4141
struct utreexo_forest_file *pfile =
4242
(struct utreexo_forest_file *)malloc(sizeof(struct utreexo_forest_file));
43+
4344
if (pfile == NULL) {
4445
perror("malloc");
4546
exit(1);

src/forest_test.c

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "flat_file.h"
77
#include "forest_node.h"
8+
#include "leaf_map.h"
89
#include "map_forest_impl.h"
910
#include "parent_hash.h"
1011
#include "test_utils.h"
@@ -15,8 +16,16 @@ static inline struct utreexo_forest get_test_forest(const char *filename) {
1516

1617
utreexo_forest_file_init(&file, &heap, filename);
1718
uint8_t *roots = ((uint8_t *)heap) + sizeof(uint64_t);
19+
20+
char map_name[100];
21+
sprintf(map_name, "map_%s", filename);
22+
23+
utreexo_leaf_map map;
24+
utreexo_leaf_map_new(&map, map_name, O_CREAT | O_RDWR);
25+
1826
struct utreexo_forest p = {
1927
.data = file,
28+
.leaf_map = map,
2029
.roots = (utreexo_forest_node **)(roots),
2130
.nLeaf = heap,
2231
};
@@ -145,12 +154,17 @@ void test_add_single() {
145154
TEST_BEGIN("add_single");
146155
utreexo_node_hash leaf = {.hash = {0}};
147156
hash_from_u8(leaf.hash, 0);
157+
148158
void *heap = NULL;
149159
struct utreexo_forest_file *file = NULL;
150160
utreexo_forest_file_init(&file, &heap, "add_single.bin");
151161

162+
utreexo_leaf_map leaf_map;
163+
utreexo_leaf_map_new(&leaf_map, "leaves_single.bin", O_CREAT | O_RDWR);
164+
152165
struct utreexo_forest p = {
153166
.data = file,
167+
.leaf_map = leaf_map,
154168
.roots = (utreexo_forest_node **)(((uint8_t *)heap) + sizeof(uint64_t)),
155169
.nLeaf = heap,
156170
};
@@ -197,6 +211,32 @@ void test_add_many() {
197211
TEST_END;
198212
}
199213

214+
void test_delete_with_map() {
215+
TEST_BEGIN("delete with map");
216+
const unsigned char expected_root[] = {
217+
0x72, 0x6f, 0xdd, 0x3b, 0x43, 0x2c, 0xc5, 0x9e, 0x68, 0x48, 0x7d,
218+
0x12, 0x6e, 0x70, 0xf0, 0xdb, 0x74, 0xa2, 0x36, 0x26, 0x7f, 0x8d,
219+
0xae, 0xae, 0x30, 0xb3, 0x18, 0x39, 0xa4, 0xe7, 0xeb, 0xed};
220+
struct utreexo_forest p = get_test_forest("delete_with_map.bin");
221+
uint8_t values[] = {0, 1, 2, 3, 4, 5, 6, 7};
222+
for (int i = 0; i < 8; i++) {
223+
utreexo_node_hash leaf = {.hash = {0}};
224+
hash_from_u8(leaf.hash, values[i]);
225+
utreexo_forest_add(&p, leaf);
226+
}
227+
228+
utreexo_node_hash leaf = {.hash = {0}};
229+
hash_from_u8(leaf.hash, values[0]);
230+
231+
utreexo_forest_node *pnode = NULL;
232+
utreexo_leaf_map_get(&p.leaf_map, &pnode, leaf);
233+
delete_single(&p, pnode);
234+
235+
ASSERT_ARRAY_EQ(expected_root, p.roots[3]->hash.hash, 32);
236+
237+
TEST_END;
238+
}
239+
200240
/* Tests from https://github.com/mit-dci/rustreexo */
201241
void test_from_test_cases(void) {
202242
TEST_BEGIN("rustreexo test suite");
@@ -267,9 +307,10 @@ void test_delete_some() {
267307
utreexo_forest_add(&p, leaf);
268308
}
269309

270-
delete_single(&p, 0);
271-
delete_single(&p, 2);
272-
delete_single(&p, 9);
310+
delete_single_pos(&p, 0);
311+
delete_single_pos(&p, 2);
312+
delete_single_pos(&p, 9);
313+
273314
ASSERT_ARRAY_EQ(p.roots[3]->hash.hash, expected_root, 32);
274315
TEST_END;
275316
}
@@ -390,7 +431,7 @@ void test_deletion_cases() {
390431
}
391432

392433
for (size_t i = 0; i < test_case->n_target_values; ++i) {
393-
delete_single(&p, test_case->target_values[i]);
434+
delete_single_pos(&p, test_case->target_values[i]);
394435
}
395436

396437
size_t n_mached = 0;
@@ -420,5 +461,7 @@ int main() {
420461
test_grab_node();
421462
test_delete_some();
422463
test_deletion_cases();
464+
test_delete_with_map();
465+
423466
return 0;
424467
}

src/leaf_map.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* COPYRIGHT (C) 2023 Davidson Souza. All Rights Reserved.
3+
*
4+
* In utreexo, we hold all UTXOs as leaves inside some trees. While we still
5+
* have trees, they are not sorted by leaf hashes and leaves can move upwards as
6+
* nodes gets deleted. Thus, making it hard to track individual leaves inside
7+
* the accumulator. To solve that, we hold a map from leaf_hash -> *leaf, where
8+
* *leaf is a pointer to that leaf inside the accumulator.
9+
*
10+
* When nodes are added and removed, it's only a pointer operation, and no data
11+
* gets moved arround, therefore, it's fair to keep pointers and derreference
12+
* them to get a (undeleted) node.
13+
*
14+
* This is a simple disk-based universal hashing hash map, we allocate a
15+
* gigantic file at the beggining (64GB) but use a sparse file, where we
16+
* "pretend" we have 64GB, but the OS doesn't allocate that until we actually
17+
* use the space. This file starts with zero bytes and grows as we go.
18+
*/
19+
#ifndef LEAF_MAP_H
20+
#define LEAF_MAP_H
21+
22+
#include "forest_node.h"
23+
24+
typedef struct {
25+
int fd;
26+
} utreexo_leaf_map;
27+
28+
/* Creates a new leaf_map. This function doesn't allocate any memory, since
29+
* utreexo_leaf_map isn't particularly big. Filename is the file we'll store
30+
* stuff in and flags are the flags for that file on our FS
31+
*/
32+
static inline void utreexo_leaf_map_new(utreexo_leaf_map *map,
33+
const char *filename,
34+
const unsigned int flags);
35+
36+
/* Gets a node's reference from the map. You should pass a pointer to a pointer
37+
* to a utreexo_forest_node. That's because you'll end-up with a
38+
* utreexo_forest_node*, the actual thing is inside the mmap-ed file, taking it
39+
* by value would create a copy that isn't what you whant.
40+
*/
41+
static inline void utreexo_leaf_map_get(utreexo_leaf_map *map,
42+
utreexo_forest_node **node,
43+
utreexo_leaf_hash leaf);
44+
45+
/* Sets a key to a given pointer */
46+
static inline void utreexo_leaf_map_set(utreexo_leaf_map *map,
47+
utreexo_forest_node *node,
48+
utreexo_leaf_hash hash);
49+
/* Delete a leaf from the map */
50+
static inline void utreexo_leaf_delete(utreexo_leaf_map *map,
51+
utreexo_node_hash hash);
52+
53+
#endif // LEAF_MAP_H

src/leaf_map_impl.h

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#include <assert.h>
2+
#include <fcntl.h>
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
#include <string.h>
6+
#include <unistd.h>
7+
8+
#include "forest_node.h"
9+
#include "leaf_map.h"
10+
11+
static inline unsigned int hash(unsigned char value[36]) {
12+
unsigned long hash = 5381;
13+
int c;
14+
15+
for (size_t i = 0; i < 36; ++i)
16+
hash = ((hash << 5) + hash) + value[i];
17+
18+
return hash & 0xffffffff;
19+
}
20+
21+
static inline void utreexo_leaf_map_new(utreexo_leaf_map *map,
22+
const char *filename,
23+
const unsigned int flags) {
24+
int fd = open(filename, flags, 0666);
25+
if (fd == -1) {
26+
perror("open");
27+
exit(EXIT_FAILURE);
28+
}
29+
30+
lseek(fd, 0xff, SEEK_SET);
31+
32+
char end = 0;
33+
write(fd, &end, sizeof(char));
34+
35+
*map = (utreexo_leaf_map){
36+
.fd = fd,
37+
};
38+
}
39+
40+
static inline void utreexo_leaf_map_get(utreexo_leaf_map *map,
41+
utreexo_forest_node **node,
42+
utreexo_leaf_hash leaf) {
43+
utreexo_forest_node *pnode = NULL;
44+
unsigned char key[36] = {0};
45+
46+
memmove(key, leaf.hash, 32);
47+
48+
do {
49+
unsigned long position = hash(key) * sizeof(void *);
50+
lseek(map->fd, position, SEEK_SET);
51+
read(map->fd, &pnode, sizeof(utreexo_forest_node *));
52+
++(*(unsigned int *)&key[32]);
53+
} while (pnode != NULL && memcmp(pnode->hash.hash, leaf.hash, 32) != 0);
54+
*node = pnode;
55+
}
56+
57+
static inline void utreexo_leaf_map_set(utreexo_leaf_map *map,
58+
utreexo_forest_node *node,
59+
utreexo_leaf_hash leaf) {
60+
utreexo_forest_node *pnode = NULL;
61+
unsigned char key[36] = {0};
62+
unsigned long position = 0;
63+
64+
memmove(key, leaf.hash, 32);
65+
66+
do {
67+
position = hash(key) * sizeof(utreexo_forest_node **);
68+
lseek(map->fd, position, SEEK_SET);
69+
read(map->fd, pnode, sizeof(utreexo_forest_node **));
70+
++(*(unsigned int *)&key[32]);
71+
} while (pnode != NULL);
72+
73+
assert(pnode == NULL);
74+
75+
lseek(map->fd, position, SEEK_SET);
76+
write(map->fd, &node, sizeof(utreexo_forest_node *));
77+
}
78+
79+
static inline void utreexo_leaf_delete(utreexo_leaf_map *map,
80+
utreexo_node_hash leaf) {
81+
utreexo_forest_node *pnode = NULL;
82+
unsigned char key[36] = {0};
83+
unsigned long position = 0;
84+
85+
memmove(key, leaf.hash, 32);
86+
87+
do {
88+
position = hash(key) * sizeof(void *);
89+
lseek(map->fd, position, SEEK_SET);
90+
read(map->fd, pnode, sizeof(utreexo_forest_node **));
91+
++(*(unsigned int *)&key[32]);
92+
} while (pnode != NULL && memcmp(pnode->hash.hash, leaf.hash, 32) != 0);
93+
94+
pnode = NULL;
95+
lseek(map->fd, position, SEEK_SET);
96+
write(map->fd, pnode, sizeof(utreexo_forest_node **));
97+
}

0 commit comments

Comments
 (0)