Skip to content

Commit

Permalink
astrid: fix stage integration cached fork choice logic (#11870)
Browse files Browse the repository at this point in the history
Next chain tip error caught and fixed for astrid stage integration:
```
append with gap blockNum=11561329, but current height=11561327
```

happens after unwind due to a fork change in the corresponding fork
choice update.

This is due to a bug in the logic of handling fork choice updates in the
stage integration. The issue is that when processing the
`cachedForkChoice` after we have done the unwind, `fixCanonicalChain`
returns empty `newNodes` (correctly, since the chain was fixed before we
cached the fork choice). The solution is to cache the new nodes as the
`cachedForkChoice` so that when we process the cached fork choice in the
next iteration we can correctly update the tx nums for the new nodes.

Full logs:
```
INFO[09-04|16:14:31.018] [2/6 PolygonSync] update fork choice     block=11561328 age=0 hash=0x41ebb5e01406c1f013f06ee4e53ab68b125f071717d50bcdcfa4597a0a052cfe
INFO[09-04|16:14:31.019] [2/6 PolygonSync] new fork - unwinding and caching fork choice 
DBUG[09-04|16:14:31.021] UnwindTo                                 block=11561327 block_hash=0xc8ba20e1e4dc312bda4aadc5108722205693783b1c2d6103cb70949bda58a460 err=nil stack="[sync.go:171 stage_polygon_sync.go:1391 stage_polygon_sync.go:1356 stage_polygon_sync.go:1478 stage_polygon_sync.go:501 stage_polygon_sync.go:175 default_stages.go:479 sync.go:531 sync.go:410 stageloop.go:249 stageloop.go:101 asm_arm64.s:1222]"
DBUG[09-04|16:14:31.021] [2/6 PolygonSync] DONE                   in=5.45216175s
DBUG[09-04|16:14:31.021] [1/6 OtterSync] DONE                     in=21.167µs
INFO[09-04|16:14:31.021] [2/6 PolygonSync] forward                progress=11561327
INFO[09-04|16:14:31.021] [2/6 PolygonSync] new fork - processing cached fork choice after unwind 
INFO[09-04|16:14:31.022] [2/6 PolygonSync] update fork choice     block=11561328 age=0 hash=0x41ebb5e01406c1f013f06ee4e53ab68b125f071717d50bcdcfa4597a0a052cfe
DBUG[09-04|16:14:31.022] [2/6 PolygonSync] DONE                   in=186.792µs
DBUG[09-04|16:14:31.022] [3/6 Senders] DONE                       in=236.458µs
INFO[09-04|16:14:31.024] [4/6 Execution] Done Commit every block  blk=11561327 blks=1 blk/s=1125.7 txs=2 tx/s=2.25k gas/s=0 buf=0B/512.0MB stepsInDB=0.00 step=24.3 alloc=600.4MB sys=1.7GB
DBUG[09-04|16:14:31.024] [4/6 Execution] DONE                     in=2.020375ms
DBUG[09-04|16:14:31.024] [5/6 TxLookup] DONE                      in=74.292µs
DBUG[09-04|16:14:31.024] [6/6 Finish] DONE                        in=2.958µs
INFO[09-04|16:14:31.024] Timings (slower than 50ms)               PolygonSync=5.452s alloc=600.5MB sys=1.7GB
DBUG[09-04|16:14:31.025] [6/6 Finish] Prune done                  in=5.625µs
DBUG[09-04|16:14:31.025] [5/6 TxLookup] Prune done                in=237.084µs
DBUG[09-04|16:14:31.025] [4/6 Execution] Prune done               in=65.958µs
DBUG[09-04|16:14:31.025] [3/6 Senders] Prune done                 in=2.75µs
DBUG[09-04|16:14:31.025] [2/6 PolygonSync] Prune done             in=2.25µs
DBUG[09-04|16:14:31.025] [snapshots] Prune Blocks                 to=11559976 limit=10
DBUG[09-04|16:14:31.026] [snapshots] Prune Bor Blocks             to=11559976 limit=10
DBUG[09-04|16:14:31.026] [1/6 OtterSync] Prune done               in=1.334833ms
DBUG[09-04|16:14:31.154] [1/6 OtterSync] DONE                     in=6.792µs
INFO[09-04|16:14:31.154] [2/6 PolygonSync] forward                progress=11561327
DBUG[09-04|16:14:33.030] [bridge] processing new blocks           from=11561329 to=11561329 lastProcessedBlockNum=11561328 lastProcessedBlockTime=1725462871 lastProcessedEventID=2688
DBUG[09-04|16:14:33.030] [sync] inserted blocks                   len=1 duration=1.184125ms
DBUG[09-04|16:14:33.030] [bor.heimdall] synchronizing spans...    blockNum=11561329
DBUG[09-04|16:14:33.031] [bridge] synchronizing events...         blockNum=11561329 lastProcessedBlockNum=11561328
INFO[09-04|16:14:33.031] [2/6 PolygonSync] update fork choice     block=11561329 age=0 hash=0x298f72d6fbbfdc8d3df098828867dea7e8e7bba787c1eb17f6c6025afa9ac3d1
WARN[09-04|16:14:33.032] [bor.heimdall] an error while fetching   path=bor/latest-span queryParams= attempt=1 err="Get \"https://heimdall-api-amoy.polygon.technology/bor/latest-span\": context canceled"
DBUG[09-04|16:14:33.032] [bor.heimdall] request canceled          reason="context canceled" path=bor/latest-span queryParams= attempt=1
EROR[09-04|16:14:36.032] [2/6 PolygonSync] stopping node          err="append with gap blockNum=11561329, but current height=11561327, stack: [txnum.go:149 accessors_chain.go:703 stage_polygon_sync.go:1398 stage_polygon_sync.go:1356 stage_polygon_sync.go:1478 stage_polygon_sync.go:501 stage_polygon_sync.go:175 default_stages.go:479 sync.go:531 sync.go:410 stageloop.go:249 stageloop.go:101 asm_arm64.s:1222]"
DBUG[09-04|16:14:36.032] Error while executing stage              err="[2/6 PolygonSync] stopped: append with gap blockNum=11561329, but current height=11561327, stack: [txnum.go:149 accessors_chain.go:703 stage_polygon_sync.go:1398 stage_polygon_sync.go:1356 stage_polygon_sync.go:1478 stage_polygon_sync.go:501 stage_polygon_sync.go:175 default_stages.go:479 sync.go:531 sync.go:410 stageloop.go:249 stageloop.go:101 asm_arm64.s:1222]"
DBUG[09-04|16:14:36.033] rpcdaemon: the subscription to pending blocks channel was closed 
```
  • Loading branch information
taratorio authored Sep 4, 2024
1 parent ed2f5f8 commit e138dab
Showing 1 changed file with 75 additions and 15 deletions.
90 changes: 75 additions & 15 deletions eth/stagedsync/stage_polygon_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,15 +470,8 @@ func (s *polygonSyncStageService) Run(ctx context.Context, tx kv.RwTx, stageStat

s.runBgComponentsOnce(ctx)

if s.executionEngine.cachedForkChoice != nil {
s.logger.Info(s.appendLogPrefix("new fork - processing cached fork choice after unwind"))
err := s.executionEngine.updateForkChoice(tx, s.executionEngine.cachedForkChoice)
if err != nil {
return err
}

s.executionEngine.cachedForkChoice = nil
return nil
if err := s.executionEngine.processCachedForkChoiceIfNeeded(tx); err != nil {
return err
}

for {
Expand Down Expand Up @@ -1261,6 +1254,36 @@ func (s polygonSyncStageBridgeStore) Close() {
// no-op
}

func newPolygonSyncStageForkChoice(newNodes []chainNode) *polygonSyncStageForkChoice {
if len(newNodes) == 0 {
panic("unexpected newNodes to be 0")
}

return &polygonSyncStageForkChoice{newNodes: newNodes}
}

type polygonSyncStageForkChoice struct {
// note newNodes contains tip first and its new ancestors after it (oldest is last)
// we assume len(newNodes) is never 0, guarded by panic in newPolygonSyncStageForkChoice
newNodes []chainNode
}

func (fc polygonSyncStageForkChoice) tipBlockNum() uint64 {
return fc.newNodes[0].number
}

func (fc polygonSyncStageForkChoice) tipBlockHash() common.Hash {
return fc.newNodes[0].hash
}

func (fc polygonSyncStageForkChoice) oldestNewAncestorBlockNum() uint64 {
return fc.newNodes[len(fc.newNodes)-1].number
}

func (fc polygonSyncStageForkChoice) numNodes() int {
return len(fc.newNodes)
}

type polygonSyncStageExecutionEngine struct {
blockReader services.FullBlockReader
txActionStream chan<- polygonSyncStageTxAction
Expand All @@ -1269,7 +1292,7 @@ type polygonSyncStageExecutionEngine struct {
appendLogPrefix func(string) string
stageState *StageState
unwinder Unwinder
cachedForkChoice *types.Header
cachedForkChoice *polygonSyncStageForkChoice
}

func (e *polygonSyncStageExecutionEngine) GetHeader(ctx context.Context, blockNum uint64) (*types.Header, error) {
Expand Down Expand Up @@ -1385,21 +1408,38 @@ func (e *polygonSyncStageExecutionEngine) updateForkChoice(tx kv.RwTx, tip *type
}

if len(badNodes) > 0 {
e.logger.Info(e.appendLogPrefix("new fork - unwinding and caching fork choice"))
badNode := badNodes[len(badNodes)-1]
e.cachedForkChoice = tip
return e.unwinder.UnwindTo(badNode.number, ForkReset(badNode.hash), tx)
unwindNumber := badNode.number - 1
badHash := badNode.hash
e.cachedForkChoice = newPolygonSyncStageForkChoice(newNodes)

e.logger.Info(
e.appendLogPrefix("new fork - unwinding and caching fork choice"),
"unwindNumber", unwindNumber,
"badHash", badHash,
"cachedTipNumber", e.cachedForkChoice.tipBlockNum(),
"cachedTipHash", e.cachedForkChoice.tipBlockHash(),
"cachedNewNodes", e.cachedForkChoice.numNodes(),
)

return e.unwinder.UnwindTo(unwindNumber, ForkReset(badHash), tx)
}

if len(newNodes) == 0 {
return nil
}

if err := rawdb.AppendCanonicalTxNums(tx, newNodes[len(newNodes)-1].number); err != nil {
return e.updateForkChoiceForward(tx, newPolygonSyncStageForkChoice(newNodes))
}

func (e *polygonSyncStageExecutionEngine) updateForkChoiceForward(tx kv.RwTx, fc *polygonSyncStageForkChoice) error {
tipBlockNum := fc.tipBlockNum()

if err := rawdb.AppendCanonicalTxNums(tx, fc.oldestNewAncestorBlockNum()); err != nil {
return err
}

if err := rawdb.WriteHeadHeaderHash(tx, tipHash); err != nil {
if err := rawdb.WriteHeadHeaderHash(tx, fc.tipBlockHash()); err != nil {
return err
}

Expand All @@ -1422,6 +1462,26 @@ func (e *polygonSyncStageExecutionEngine) updateForkChoice(tx kv.RwTx, tip *type
return nil
}

func (e *polygonSyncStageExecutionEngine) processCachedForkChoiceIfNeeded(tx kv.RwTx) error {
if e.cachedForkChoice == nil {
return nil
}

e.logger.Info(
e.appendLogPrefix("new fork - processing cached fork choice after unwind"),
"cachedTipNumber", e.cachedForkChoice.tipBlockNum(),
"cachedTipHash", e.cachedForkChoice.tipBlockHash(),
"cachedNewNodes", e.cachedForkChoice.numNodes(),
)

if err := e.updateForkChoiceForward(tx, e.cachedForkChoice); err != nil {
return err
}

e.cachedForkChoice = nil
return nil
}

func (e *polygonSyncStageExecutionEngine) CurrentHeader(ctx context.Context) (*types.Header, error) {
type response struct {
result *types.Header
Expand Down

0 comments on commit e138dab

Please sign in to comment.