Skip to content

Commit 6ab0171

Browse files
committed
gossipd: save gossip store writes, try them again (and fsync) if we get a read issue.
This is a last resort, but what else are we supposed to do when we wrote something and it didn't appear? In particular, ZFS doesn't just "fix itself": ``` remaining_fd=200001b0c9761dff0000000001009411e26cd56d68aabc285ee1c8ee43d59be6f939b0ce353d80213918680a7438356b9c5ea6bb001a6 bb37a4dea93776f4abc8cd371525b4d1605a74b89d7cb1bfc8865ddf22288c7ea08b9d98b34155b4aed159eb81732957e6bf79b996752bf2a9995aae ad1d65e7889e826ea0ba42f7746c176fe12f2fe6c04af1a74b4f0a262d20efd57133eb32693c789eb3f09caf4f4c6ecd2f734b3b36e751ffcc2748c5 8feabce4173c4ce6098a2c5397aabf1be5442cb67b5030be11ebd8b9841838dae127fe30000000000000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 000000000000000000000000000000000000000000000000000000000002000000a218b9d93000000001005000000000000c060 ``` Note the record appended on the end *after all the zeroes*. Changelog-Changed: gossipd: add gossip_store recovery for filesystems which do not synchronize read and write (e.g. ZFS on Linux), by disabling mmap reads and rewriting the last records. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
1 parent e1cbc77 commit 6ab0171

File tree

3 files changed

+76
-48
lines changed

3 files changed

+76
-48
lines changed

gossipd/gossip_store.c

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -44,29 +44,11 @@ static void gossip_store_destroy(struct gossip_store *gs)
4444
close(gs->fd);
4545
}
4646

47-
#if HAVE_PWRITEV
48-
/* One fewer syscall for the win! */
49-
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
50-
off_t offset)
47+
static bool append_msg(int fd, const u8 *msg, u32 timestamp, u64 *len,
48+
const u8 ***msgs)
5149
{
52-
return pwritev(fd, iov, iovcnt, offset);
53-
}
54-
#else /* Hello MacOS! */
55-
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
56-
off_t offset)
57-
{
58-
if (lseek(fd, offset, SEEK_SET) != offset)
59-
return -1;
60-
return writev(fd, iov, iovcnt);
61-
}
62-
#endif /* !HAVE_PWRITEV */
63-
64-
static bool append_msg(int fd, const u8 *msg, u32 timestamp, u64 *len)
65-
{
66-
struct gossip_hdr hdr;
50+
struct gossip_hdr *hdr;
6751
u32 msglen;
68-
struct iovec iov[2];
69-
const u8 complete_byte = (GOSSIP_STORE_COMPLETED_BIT >> 8);
7052

7153
/* Don't ever overwrite the version header! */
7254
assert(*len);
@@ -76,25 +58,25 @@ static bool append_msg(int fd, const u8 *msg, u32 timestamp, u64 *len)
7658
msglen = tal_count(msg);
7759
/* All messages begin with a 16-bit type */
7860
assert(msglen >= 2);
79-
hdr.len = cpu_to_be16(msglen);
80-
hdr.flags = 0;
81-
hdr.crc = cpu_to_be32(crc32c(timestamp, msg, msglen));
82-
hdr.timestamp = cpu_to_be32(timestamp);
8361

84-
/* pwritev makes it more likely to appear at once, plus it's
85-
* exactly what we want. */
86-
iov[0].iov_base = &hdr;
87-
iov[0].iov_len = sizeof(hdr);
88-
iov[1].iov_base = (void *)msg;
89-
iov[1].iov_len = msglen;
90-
if (gossip_pwritev(fd, iov, ARRAY_SIZE(iov), *len) != sizeof(hdr) + msglen)
62+
hdr = (struct gossip_hdr *)tal_arr(tmpctx, u8, sizeof(*hdr) + msglen);
63+
hdr->len = cpu_to_be16(msglen);
64+
hdr->flags = 0;
65+
hdr->crc = cpu_to_be32(crc32c(timestamp, msg, msglen));
66+
hdr->timestamp = cpu_to_be32(timestamp);
67+
memcpy(hdr + 1, msg, msglen);
68+
69+
if (pwrite(fd, hdr, sizeof(*hdr) + msglen, *len) != sizeof(*hdr) + msglen)
9170
return false;
9271

9372
/* Update the hdr with the complete bit as a single-byte write */
94-
if (pwrite(fd, &complete_byte, 1, *len) != 1)
73+
hdr->flags = CPU_TO_BE16(GOSSIP_STORE_COMPLETED_BIT);
74+
if (pwrite(fd, &hdr->flags, 1, *len) != 1)
9575
return false;
9676

97-
*len += sizeof(hdr) + msglen;
77+
*len += sizeof(*hdr) + msglen;
78+
if (msgs)
79+
tal_arr_expand(msgs, (const u8 *)tal_steal(*msgs, hdr));
9880
return true;
9981
}
10082

@@ -376,7 +358,7 @@ static int gossip_store_compact(struct daemon *daemon,
376358
/* Create end marker now new file exists. */
377359
if (old_fd != -1) {
378360
append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len),
379-
0, &old_len);
361+
0, &old_len, NULL);
380362
close(old_fd);
381363
}
382364

@@ -426,7 +408,32 @@ void gossip_store_fsync(const struct gossip_store *gs)
426408
"gossmap fsync failed: %s", strerror(errno));
427409
}
428410

429-
u64 gossip_store_add(struct gossip_store *gs, const u8 *gossip_msg, u32 timestamp)
411+
void gossip_store_rewrite_end(struct gossip_store *gs, const u8 **msgs)
412+
{
413+
u64 offset = gs->len;
414+
415+
for (size_t i = 0; i < tal_count(msgs); i++) {
416+
/* Don't overwrite version byte */
417+
assert(tal_bytelen(msgs[i]) < gs->len);
418+
offset -= tal_bytelen(msgs[i]);
419+
}
420+
421+
for (size_t i = 0; i < tal_count(msgs); i++) {
422+
if (pwrite(gs->fd, msgs[i], tal_bytelen(msgs[i]), offset) != tal_bytelen(msgs[i]))
423+
status_failed(STATUS_FAIL_INTERNAL_ERROR,
424+
"Failed to re-write %s at offset %"PRIu64,
425+
tal_hex(tmpctx, msgs[i]), offset);
426+
offset += tal_bytelen(msgs[i]);
427+
}
428+
429+
/* Hit it harder. */
430+
gossip_store_fsync(gs);
431+
}
432+
433+
u64 gossip_store_add(struct gossip_store *gs,
434+
const u8 *gossip_msg,
435+
u32 timestamp,
436+
const u8 ***msgs)
430437
{
431438
u64 off = gs->len, filelen;
432439

@@ -446,7 +453,7 @@ u64 gossip_store_add(struct gossip_store *gs, const u8 *gossip_msg, u32 timestam
446453
filelen, off);
447454
}
448455

449-
if (!append_msg(gs->fd, gossip_msg, timestamp, &gs->len)) {
456+
if (!append_msg(gs->fd, gossip_msg, timestamp, &gs->len, msgs)) {
450457
status_failed(STATUS_FAIL_INTERNAL_ERROR,
451458
"Failed writing to gossip store: %s",
452459
strerror(errno));

gossipd/gossip_store.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,14 @@ void gossip_store_corrupt(void);
4949
* @gs: gossip store
5050
* @gossip_msg: the gossip message to insert.
5151
* @timestamp: the timestamp for filtering of this messsage.
52+
* @msgs: the option pointer to a u8 *array to append the written msgs to.
53+
*
54+
* Returns the offset (after the gossip_hdr).
5255
*/
5356
u64 gossip_store_add(struct gossip_store *gs,
5457
const u8 *gossip_msg,
55-
u32 timestamp);
56-
58+
u32 timestamp,
59+
const u8 ***msgs);
5760

5861
/**
5962
* Delete the record at this offset (offset is that of
@@ -106,6 +109,11 @@ u32 gossip_store_get_timestamp(struct gossip_store *gs, u64 offset);
106109
*/
107110
void gossip_store_set_timestamp(struct gossip_store *gs, u64 offset, u32 timestamp);
108111

112+
/**
113+
* We've seen (ZFS on Linux) writes not show up in the gossip store.
114+
* This lets us rewrite the last bytes. */
115+
void gossip_store_rewrite_end(struct gossip_store *gs, const u8 **msgs);
116+
109117
/**
110118
* For debugging.
111119
*/

gossipd/gossmap_manage.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ struct gossmap_manage {
6767
/* gossip map itself (access via gossmap_manage_get_gossmap, so it's fresh!) */
6868
struct gossmap *raw_gossmap;
6969

70+
/* Last writes to gossmap since previous sync, in case it
71+
* messes up and we need to force it. */
72+
const u8 **last_writes;
73+
7074
/* The gossip_store, which writes to the gossip_store file */
7175
struct gossip_store *gs;
7276

@@ -266,7 +270,7 @@ static void remove_channel(struct gossmap_manage *gm,
266270
/* Put in tombstone marker. */
267271
gossip_store_add(gm->gs,
268272
towire_gossip_store_delete_chan(tmpctx, scid),
269-
0);
273+
0, &gm->last_writes);
270274

271275
/* Delete from store */
272276
gossip_store_del(gm->gs, chan->cann_off, WIRE_CHANNEL_ANNOUNCEMENT);
@@ -307,7 +311,7 @@ static void remove_channel(struct gossmap_manage *gm,
307311
timestamp = gossip_store_get_timestamp(gm->gs, node->nann_off);
308312

309313
gossip_store_del(gm->gs, node->nann_off, WIRE_NODE_ANNOUNCEMENT);
310-
offset = gossip_store_add(gm->gs, nannounce, timestamp);
314+
offset = gossip_store_add(gm->gs, nannounce, timestamp, &gm->last_writes);
311315
} else {
312316
/* Are all remaining channels dying but we weren't?
313317
* Can happen if we removed this channel immediately
@@ -467,6 +471,7 @@ static bool setup_gossmap(struct gossmap_manage *gm,
467471
gm->gs = tal_free(gm->gs);
468472
return false;
469473
}
474+
gm->last_writes = tal_arr(gm, const u8 *, 0);
470475
return true;
471476
}
472477

@@ -617,9 +622,10 @@ const char *gossmap_manage_channel_announcement(const tal_t *ctx,
617622
*/
618623
if (known_amount) {
619624
/* Set with timestamp 0 (we will update once we have a channel_update) */
620-
gossip_store_add(gm->gs, announce, 0);
625+
gossip_store_add(gm->gs, announce, 0, &gm->last_writes);
621626
gossip_store_add(gm->gs,
622-
towire_gossip_store_channel_amount(tmpctx, *known_amount), 0);
627+
towire_gossip_store_channel_amount(tmpctx, *known_amount), 0,
628+
&gm->last_writes);
623629

624630
node_announcements_not_dying(gm, gossmap, pca);
625631
tal_free(pca);
@@ -743,9 +749,10 @@ void gossmap_manage_handle_get_txout_reply(struct gossmap_manage *gm, const u8 *
743749
}
744750

745751
/* Set with timestamp 0 (we will update once we have a channel_update) */
746-
gossip_store_add(gm->gs, pca->channel_announcement, 0);
752+
gossip_store_add(gm->gs, pca->channel_announcement, 0, &gm->last_writes);
747753
gossip_store_add(gm->gs,
748-
towire_gossip_store_channel_amount(tmpctx, sat), 0);
754+
towire_gossip_store_channel_amount(tmpctx, sat), 0,
755+
&gm->last_writes);
749756

750757
/* If we looking specifically for this, we no longer are. */
751758
remove_unknown_scid(gm->daemon->seeker, &scid, true);
@@ -847,7 +854,7 @@ static const char *process_channel_update(const tal_t *ctx,
847854
}
848855

849856
/* OK, apply the new one */
850-
offset = gossip_store_add(gm->gs, update, timestamp);
857+
offset = gossip_store_add(gm->gs, update, timestamp, &gm->last_writes);
851858

852859
/* If channel is dying, make sure update is also marked dying! */
853860
if (gossmap_chan_is_dying(gossmap, chan)) {
@@ -1011,7 +1018,7 @@ static void process_node_announcement(struct gossmap_manage *gm,
10111018
}
10121019

10131020
/* OK, apply the new one */
1014-
offset = gossip_store_add(gm->gs, nannounce, timestamp);
1021+
offset = gossip_store_add(gm->gs, nannounce, timestamp, &gm->last_writes);
10151022
/* If all channels are dying, make sure this is marked too. */
10161023
if (all_node_channels_dying(gossmap, node, NULL)) {
10171024
gossip_store_set_flag(gm->gs, offset,
@@ -1347,7 +1354,7 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm,
13471354

13481355
/* Save to gossip_store in case we restart */
13491356
msg = towire_gossip_store_chan_dying(tmpctx, cd.scid, cd.deadline);
1350-
cd.gossmap_offset = gossip_store_add(gm->gs, msg, 0);
1357+
cd.gossmap_offset = gossip_store_add(gm->gs, msg, 0, &gm->last_writes);
13511358
tal_arr_expand(&gm->dying_channels, cd);
13521359

13531360
/* Mark it dying, so we don't gossip it */
@@ -1449,6 +1456,9 @@ struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm)
14491456
map_used, map_size,
14501457
tal_hex(tmpctx, remainder_fd));
14511458
gossmap_disable_mmap(gm->raw_gossmap);
1459+
1460+
/* Try rewriting the last few records, syncing. */
1461+
gossip_store_rewrite_end(gm->gs, gm->last_writes);
14521462
gossmap_refresh(gm->raw_gossmap);
14531463

14541464
map_used = gossmap_lengths(gm->raw_gossmap, &map_size);
@@ -1459,6 +1469,9 @@ struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm)
14591469
}
14601470
}
14611471

1472+
/* Free up last_writes, since we've seen it on disk */
1473+
tal_free(gm->last_writes);
1474+
gm->last_writes = tal_arr(gm, const u8 *, 0);
14621475
return gm->raw_gossmap;
14631476
}
14641477

0 commit comments

Comments
 (0)