From 5e62f1d39bcd9a2d2bbd95198706d0e94a85ed8f Mon Sep 17 00:00:00 2001
From: milen <94537774+taratorio@users.noreply.github.com>
Date: Tue, 22 Oct 2024 06:50:50 +0100
Subject: [PATCH] headerdownload: fix OOM due to inifinitely growing children
in link (#12404)
relates to https://github.com/erigontech/erigon/issues/11387,
https://github.com/erigontech/erigon/issues/11473,
https://github.com/erigontech/erigon/issues/10734
tried to simulate the OOM using
https://github.com/erigontech/erigon/pull/11799
What I found was infinitely growing alloc of headers when receiving new
header messages in sentry's `blockHeaders66` handler (check screenshot
below).
It looks like this is happening because in the case of a bad child
header: we delete it from the `links` map, however its parent link still
holds a reference to it so the deleted link & header never get gc-ed.
Furthermore if new similar bad hashes arrive after deletion they get
appended to their parent header's link and the children of that link can
grow indefinitely
([here](https://github.com/erigontech/erigon/blob/main/turbo/stages/headerdownload/header_algos.go#L1085-L1086)).
Ie confirmed with debug logs (note link at 13450415 has 140124
children):
```
DBUG[10-21|18:18:05.003] [downloader] InsertHeader: Rejected header parent info hash=0xb488d67deaf4103880fa972fd72a7a9be552e3bc653f124f1ad9cb45f36bcd07 height=13450415 children=140124
```
The solution for this is to remove the bad link from its parent child
list
[here](https://github.com/erigontech/erigon/blob/main/turbo/stages/headerdownload/header_algos.go#L544)
so that 1) it gets gc-ed and 2) the children list does not grow
indefinitely.
![oom-heap-profile2](https://github.com/user-attachments/assets/518fa658-c199-48b6-aa2d-110673264144)
---
turbo/stages/headerdownload/header_algos.go | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/turbo/stages/headerdownload/header_algos.go b/turbo/stages/headerdownload/header_algos.go
index 2eaa685577f..343fe7d8281 100644
--- a/turbo/stages/headerdownload/header_algos.go
+++ b/turbo/stages/headerdownload/header_algos.go
@@ -177,6 +177,11 @@ func (hd *HeaderDownload) removeUpwards(link *Link) {
if link == nil {
return
}
+ if link.header != nil {
+ if parentLink, ok := hd.links[link.header.ParentHash]; ok {
+ parentLink.RemoveChild(link)
+ }
+ }
var toRemove = []*Link{link}
for len(toRemove) > 0 {
removal := toRemove[len(toRemove)-1]
@@ -535,8 +540,6 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult
}
if bad {
// If the link or its parent is marked bad, throw it out
- hd.moveLinkToQueue(link, NoQueue)
- delete(hd.links, link.hash)
hd.removeUpwards(link)
dataflow.HeaderDownloadStates.AddChange(link.blockHeight, dataflow.HeaderBad)
hd.stats.RejectedBadHeaders++
@@ -553,12 +556,7 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult
return false, false, 0, lastTime, nil // prevent removal of the link from the hd.linkQueue
} else {
hd.logger.Debug("[downloader] Verification failed for header", "hash", link.hash, "height", link.blockHeight, "err", err)
- hd.moveLinkToQueue(link, NoQueue)
- delete(hd.links, link.hash)
hd.removeUpwards(link)
- if parentLink, ok := hd.links[link.header.ParentHash]; ok {
- parentLink.RemoveChild(link)
- }
dataflow.HeaderDownloadStates.AddChange(link.blockHeight, dataflow.HeaderEvicted)
hd.stats.InvalidHeaders++
return true, false, 0, lastTime, nil