Skip to content

Commit

Permalink
Merge pull request #5630 from oasisprotocol/andrej/bugfix/txpool-chec…
Browse files Browse the repository at this point in the history
…ktx-fix

go/runtime/txpool: Don't abort runtime if node is not synced yet
  • Loading branch information
abukosek authored Apr 21, 2024
2 parents 48209a9 + e2a8693 commit cd3f82b
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 16 deletions.
6 changes: 6 additions & 0 deletions .changelog/5630.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
go/runtime/txpool: Don't abort runtime if node is not synced yet

If the node hasn't finished syncing, `checkTxBatch` previously
caused the runtime to be aborted, even though it wasn't the
runtime's fault.
Now the checks are retried after the node is finished syncing.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 22 additions & 8 deletions go/runtime/txpool/txpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const (
checkTxTimeout = 15 * time.Second
// checkTxRetryDelay is the time to wait before queuing a check tx retry.
checkTxRetryDelay = 1 * time.Second
// checkTxWaitRoundSyncedTimeout is the time to wait for block to be
// synced with storage at the start of check tx.
checkTxWaitRoundSyncedTimeout = 5 * time.Second
// abortTimeout is the maximum time the runtime can spend aborting.
abortTimeout = 5 * time.Second
// maxRepublishTxs is the maximum amount of transactions to republish.
Expand Down Expand Up @@ -460,20 +463,31 @@ func (t *txPool) checkTxBatch(ctx context.Context, rr host.RichRuntime) {
return
}

// Ensure block round is synced to storage.
waitSyncCtx, cancelWaitSyncCtx := context.WithTimeout(ctx, checkTxWaitRoundSyncedTimeout)
defer cancelWaitSyncCtx()

t.logger.Debug("ensuring block round is synced", "round", bi.RuntimeBlock.Header.Round)
if _, err = t.history.WaitRoundSynced(waitSyncCtx, bi.RuntimeBlock.Header.Round); err != nil {
// Block round isn't synced yet, so make sure the batch check is
// retried later to avoid aborting the runtime, as it is not its fault.
t.logger.Info("block round is not synced yet, retrying transaction batch check later",
"round", bi.RuntimeBlock.Header.Round,
"err", err,
)
t.checkTxCh.In() <- struct{}{}
return
}

// Pop the next batch from the queue, check it, and notify submitters.
batch := t.checkTxQueue.pop()
if len(batch) == 0 {
return
}

results, err := func() ([]protocol.CheckTxResult, error) {
checkCtx, cancel := context.WithTimeout(ctx, checkTxTimeout)
defer cancel()

// Ensure block round is synced to storage.
t.logger.Debug("ensuring block round is synced", "round", bi.RuntimeBlock.Header.Round)
if _, err = t.history.WaitRoundSynced(checkCtx, bi.RuntimeBlock.Header.Round); err != nil {
return nil, err
}
checkCtx, cancelCheckCtx := context.WithTimeout(ctx, checkTxTimeout)
defer cancelCheckCtx()

// Check batch.
rawTxBatch := make([][]byte, 0, len(batch))
Expand Down
13 changes: 8 additions & 5 deletions go/worker/common/committee/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,11 +229,6 @@ func (n *Node) Start() error {
return fmt.Errorf("failed to start group services: %w", err)
}

// Start the transaction pool.
if err := n.TxPool.Start(); err != nil {
return fmt.Errorf("failed to start transaction pool: %w", err)
}

go n.worker()
if cmmetrics.Enabled() {
go n.metricsWorker()
Expand Down Expand Up @@ -616,6 +611,14 @@ func (n *Node) worker() {
n.logger.Info("consensus has finished initial synchronization")
atomic.StoreUint32(&n.consensusSynced, 1)

// Start the transaction pool after consensus is synced.
if err := n.TxPool.Start(); err != nil {
n.logger.Error("failed to start transaction pool",
"err", err,
)
return
}

// Wait for the runtime.
rt, err := n.Runtime.ActiveDescriptor(n.ctx)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion tests/runtimes/simple-rofl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ anyhow = "1.0"
async-trait = "0.1.77"
mbedtls = { version = "0.12.3", features = ["x509"] }
rand = "0.8.5"
rustls = { version = "0.22.2", default-features = false }
rustls = { version = "0.22.4", default-features = false }
rustls-mbedcrypto-provider = { version = "0.0.2" }
rustls-mbedpki-provider = { version = "0.0.2" }
tokio = { version = "1.36.0", features = ["rt", "rt-multi-thread", "sync"] }

0 comments on commit cd3f82b

Please sign in to comment.