From 5e62f1d39bcd9a2d2bbd95198706d0e94a85ed8f Mon Sep 17 00:00:00 2001 From: milen <94537774+taratorio@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:50:50 +0100 Subject: [PATCH] headerdownload: fix OOM due to inifinitely growing children in link (#12404) relates to https://github.com/erigontech/erigon/issues/11387, https://github.com/erigontech/erigon/issues/11473, https://github.com/erigontech/erigon/issues/10734 tried to simulate the OOM using https://github.com/erigontech/erigon/pull/11799 What I found was infinitely growing alloc of headers when receiving new header messages in sentry's `blockHeaders66` handler (check screenshot below). It looks like this is happening because in the case of a bad child header: we delete it from the `links` map, however its parent link still holds a reference to it so the deleted link & header never get gc-ed. Furthermore if new similar bad hashes arrive after deletion they get appended to their parent header's link and the children of that link can grow indefinitely ([here](https://github.com/erigontech/erigon/blob/main/turbo/stages/headerdownload/header_algos.go#L1085-L1086)). Ie confirmed with debug logs (note link at 13450415 has 140124 children): ``` DBUG[10-21|18:18:05.003] [downloader] InsertHeader: Rejected header parent info hash=0xb488d67deaf4103880fa972fd72a7a9be552e3bc653f124f1ad9cb45f36bcd07 height=13450415 children=140124 ```
The solution for this is to remove the bad link from its parent child list [here](https://github.com/erigontech/erigon/blob/main/turbo/stages/headerdownload/header_algos.go#L544) so that 1) it gets gc-ed and 2) the children list does not grow indefinitely.
![oom-heap-profile2](https://github.com/user-attachments/assets/518fa658-c199-48b6-aa2d-110673264144) --- turbo/stages/headerdownload/header_algos.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/turbo/stages/headerdownload/header_algos.go b/turbo/stages/headerdownload/header_algos.go index 2eaa685577f..343fe7d8281 100644 --- a/turbo/stages/headerdownload/header_algos.go +++ b/turbo/stages/headerdownload/header_algos.go @@ -177,6 +177,11 @@ func (hd *HeaderDownload) removeUpwards(link *Link) { if link == nil { return } + if link.header != nil { + if parentLink, ok := hd.links[link.header.ParentHash]; ok { + parentLink.RemoveChild(link) + } + } var toRemove = []*Link{link} for len(toRemove) > 0 { removal := toRemove[len(toRemove)-1] @@ -535,8 +540,6 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult } if bad { // If the link or its parent is marked bad, throw it out - hd.moveLinkToQueue(link, NoQueue) - delete(hd.links, link.hash) hd.removeUpwards(link) dataflow.HeaderDownloadStates.AddChange(link.blockHeight, dataflow.HeaderBad) hd.stats.RejectedBadHeaders++ @@ -553,12 +556,7 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult return false, false, 0, lastTime, nil // prevent removal of the link from the hd.linkQueue } else { hd.logger.Debug("[downloader] Verification failed for header", "hash", link.hash, "height", link.blockHeight, "err", err) - hd.moveLinkToQueue(link, NoQueue) - delete(hd.links, link.hash) hd.removeUpwards(link) - if parentLink, ok := hd.links[link.header.ParentHash]; ok { - parentLink.RemoveChild(link) - } dataflow.HeaderDownloadStates.AddChange(link.blockHeight, dataflow.HeaderEvicted) hd.stats.InvalidHeaders++ return true, false, 0, lastTime, nil