Skip to content

Commit 9a81484

Browse files
authored
ZAP: Reduce leaf array and free chunks fragmentation
Previous implementation of zap_leaf_array_free() put chunks on the free list in reverse order. Also zap_leaf_transfer_entry() and zap_entry_remove() were freeing name and value arrays in reverse order. Together this created a mess in the free list, making following allocations much more fragmented than necessary. This patch re-implements zap_leaf_array_free() to keep existing chunks order, and implements non-destructive zap_leaf_array_copy() to be used in zap_leaf_transfer_entry() to allow properly ordered freeing name and value arrays there and in zap_entry_remove(). With this change test of some writes and deletes shows percent of non-contiguous chunks in DDT reducing from 61% and 47% to 0% and 17% for arrays and frees respectively. Sure some explicit sorting could do even better, especially for ZAPs with variable-size arrays, but it would also cost much more, while this should be very cheap. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes openzfs#16766
1 parent d02257c commit 9a81484

File tree

1 file changed

+62
-44
lines changed

1 file changed

+62
-44
lines changed

module/zfs/zap_leaf.c

Lines changed: 62 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -248,20 +248,63 @@ zap_leaf_array_create(zap_leaf_t *l, const char *buf,
248248
return (chunk_head);
249249
}
250250

251-
static void
252-
zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
251+
/*
252+
* Non-destructively copy array between leaves.
253+
*/
254+
static uint16_t
255+
zap_leaf_array_copy(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
253256
{
254-
uint16_t chunk = *chunkp;
257+
uint16_t new_chunk;
258+
uint16_t *nchunkp = &new_chunk;
255259

256-
*chunkp = CHAIN_END;
260+
while (chunk != CHAIN_END) {
261+
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
262+
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
263+
264+
struct zap_leaf_array *la =
265+
&ZAP_LEAF_CHUNK(l, chunk).l_array;
266+
struct zap_leaf_array *nla =
267+
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
268+
ASSERT3U(la->la_type, ==, ZAP_CHUNK_ARRAY);
269+
270+
*nla = *la; /* structure assignment */
271+
272+
chunk = la->la_next;
273+
*nchunkp = nchunk;
274+
nchunkp = &nla->la_next;
275+
}
276+
*nchunkp = CHAIN_END;
277+
return (new_chunk);
278+
}
279+
280+
/*
281+
* Free array. Unlike trivial loop of zap_leaf_chunk_free() this does
282+
* not reverse order of chunks in the free list, reducing fragmentation.
283+
*/
284+
static void
285+
zap_leaf_array_free(zap_leaf_t *l, uint16_t chunk)
286+
{
287+
struct zap_leaf_header *hdr = &zap_leaf_phys(l)->l_hdr;
288+
uint16_t *tailp = &hdr->lh_freelist;
289+
uint16_t oldfree = *tailp;
257290

258291
while (chunk != CHAIN_END) {
259-
uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
260-
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
261-
ZAP_CHUNK_ARRAY);
262-
zap_leaf_chunk_free(l, chunk);
263-
chunk = nextchunk;
292+
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
293+
zap_leaf_chunk_t *c = &ZAP_LEAF_CHUNK(l, chunk);
294+
ASSERT3U(c->l_array.la_type, ==, ZAP_CHUNK_ARRAY);
295+
296+
*tailp = chunk;
297+
chunk = c->l_array.la_next;
298+
299+
c->l_free.lf_type = ZAP_CHUNK_FREE;
300+
memset(c->l_free.lf_pad, 0, sizeof (c->l_free.lf_pad));
301+
tailp = &c->l_free.lf_next;
302+
303+
ASSERT3U(hdr->lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
304+
hdr->lh_nfree++;
264305
}
306+
307+
*tailp = oldfree;
265308
}
266309

267310
/* array_len and buf_len are in integers, not bytes */
@@ -515,7 +558,7 @@ zap_entry_update(zap_entry_handle_t *zeh,
515558
if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
516559
return (SET_ERROR(EAGAIN));
517560

518-
zap_leaf_array_free(l, &le->le_value_chunk);
561+
zap_leaf_array_free(l, le->le_value_chunk);
519562
le->le_value_chunk =
520563
zap_leaf_array_create(l, buf, integer_size, num_integers);
521564
le->le_value_numints = num_integers;
@@ -534,10 +577,11 @@ zap_entry_remove(zap_entry_handle_t *zeh)
534577
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
535578
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
536579

537-
zap_leaf_array_free(l, &le->le_name_chunk);
538-
zap_leaf_array_free(l, &le->le_value_chunk);
539-
540580
*zeh->zeh_chunkp = le->le_next;
581+
582+
/* Free in opposite order to reduce fragmentation. */
583+
zap_leaf_array_free(l, le->le_value_chunk);
584+
zap_leaf_array_free(l, le->le_name_chunk);
541585
zap_leaf_chunk_free(l, entry_chunk);
542586

543587
zap_leaf_phys(l)->l_hdr.lh_nentries--;
@@ -701,34 +745,6 @@ zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry)
701745
return (chunkp);
702746
}
703747

704-
static uint16_t
705-
zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
706-
{
707-
uint16_t new_chunk;
708-
uint16_t *nchunkp = &new_chunk;
709-
710-
while (chunk != CHAIN_END) {
711-
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
712-
struct zap_leaf_array *nla =
713-
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
714-
struct zap_leaf_array *la =
715-
&ZAP_LEAF_CHUNK(l, chunk).l_array;
716-
uint_t nextchunk = la->la_next;
717-
718-
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
719-
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
720-
721-
*nla = *la; /* structure assignment */
722-
723-
zap_leaf_chunk_free(l, chunk);
724-
chunk = nextchunk;
725-
*nchunkp = nchunk;
726-
nchunkp = &nla->la_next;
727-
}
728-
*nchunkp = CHAIN_END;
729-
return (new_chunk);
730-
}
731-
732748
static void
733749
zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
734750
{
@@ -741,10 +757,12 @@ zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
741757

742758
(void) zap_leaf_rehash_entry(nl, nle, chunk);
743759

744-
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
745-
nle->le_value_chunk =
746-
zap_leaf_transfer_array(l, le->le_value_chunk, nl);
760+
nle->le_name_chunk = zap_leaf_array_copy(l, le->le_name_chunk, nl);
761+
nle->le_value_chunk = zap_leaf_array_copy(l, le->le_value_chunk, nl);
747762

763+
/* Free in opposite order to reduce fragmentation. */
764+
zap_leaf_array_free(l, le->le_value_chunk);
765+
zap_leaf_array_free(l, le->le_name_chunk);
748766
zap_leaf_chunk_free(l, entry);
749767

750768
zap_leaf_phys(l)->l_hdr.lh_nentries--;

0 commit comments

Comments
 (0)