Skip to content

Commit

Permalink
feat(sentry): Improve startup procedure (#393)
Browse files Browse the repository at this point in the history
* feat(sentry): Improve startup procedure

* fix: Increase timeout to 10 minutes for beacon node
  • Loading branch information
samcm authored Oct 9, 2024
1 parent 4854584 commit 0aa3b3c
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 7 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/chuckpreslar/emission v0.0.0-20170206194824-a7ddd980baf9
github.com/creasty/defaults v1.7.0
github.com/ethereum/go-ethereum v1.14.10
github.com/ethpandaops/beacon v0.41.0
github.com/ethpandaops/beacon v0.42.0
github.com/ethpandaops/ethcore v0.0.0-20240422023000-2a5727b18756
github.com/ethpandaops/ethwallclock v0.3.0
github.com/go-co-op/gocron v1.27.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ github.com/ethereum/go-ethereum v1.14.10 h1:kC24WjYeRjDy86LVo6MfF5Xs7nnUu+XG4Aja
github.com/ethereum/go-ethereum v1.14.10/go.mod h1:+l/fr42Mma+xBnhefL/+z11/hcmJ2egl+ScIVPjhc7E=
github.com/ethereum/go-verkle v0.1.1-0.20240829091221-dffa7562dbe9 h1:8NfxH2iXvJ60YRB8ChToFTUzl8awsc3cJ8CbLjGIl/A=
github.com/ethereum/go-verkle v0.1.1-0.20240829091221-dffa7562dbe9/go.mod h1:M3b90YRnzqKyyzBEWJGqj8Qff4IDeXnzFw0P9bFw3uk=
github.com/ethpandaops/beacon v0.41.0 h1:9CmgNeTZ6X+B1U7SOJzy3rf6WFtFb3CA2DTFEgGwLc8=
github.com/ethpandaops/beacon v0.41.0/go.mod h1:hKfalJGsF4BuWPwcGCX/4fdQR31zDJVaTLWwrkfNTzw=
github.com/ethpandaops/beacon v0.42.0 h1:5a3ld5wuAgX+N5KxEPuNfxDhdeiBG4gXlTAgCm0AuSE=
github.com/ethpandaops/beacon v0.42.0/go.mod h1:hKfalJGsF4BuWPwcGCX/4fdQR31zDJVaTLWwrkfNTzw=
github.com/ethpandaops/ethcore v0.0.0-20240422023000-2a5727b18756 h1:8JWjrRfP14m0oxOk03m11n/xgdY5ceyUf/ZxYdOs5gE=
github.com/ethpandaops/ethcore v0.0.0-20240422023000-2a5727b18756/go.mod h1:ZvKqL6CKxiraefdXPHeJurV2pDD/f2HF2uklDVdrry8=
github.com/ethpandaops/ethwallclock v0.3.0 h1:xF5fwtBf+bHFHZKBnwiPFEuelW3sMM7SD3ZNFq1lJY4=
Expand Down
25 changes: 22 additions & 3 deletions pkg/sentry/ethereum/beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,13 @@ func (b *BeaconNode) Start(ctx context.Context) error {
s := gocron.NewScheduler(time.Local)

errs := make(chan error, 1)
healthyFirstTime := make(chan struct{})

b.beacon.OnFirstTimeHealthy(ctx, func(ctx context.Context, event *beacon.FirstTimeHealthyEvent) error {
b.log.Info("Upstream beacon node is healthy")

close(healthyFirstTime)

go func() {
wg := sync.WaitGroup{}

for _, service := range b.services {
Expand All @@ -88,6 +93,8 @@ func (b *BeaconNode) Start(ctx context.Context) error {
errs <- fmt.Errorf("failed to start service: %w", err)
}

b.log.WithField("service", service.Name()).Info("Waiting for service to be ready")

wg.Wait()
}

Expand All @@ -98,14 +105,26 @@ func (b *BeaconNode) Start(ctx context.Context) error {
errs <- fmt.Errorf("failed to run on ready callback: %w", err)
}
}
}()

return nil
})

s.StartAsync()

if err := b.beacon.Start(ctx); err != nil {
b.beacon.StartAsync(ctx)

select {
case err := <-errs:
return err
case <-ctx.Done():
return ctx.Err()
case <-healthyFirstTime:
// Beacon node is healthy, continue with normal operation
case <-time.After(10 * time.Minute):
return errors.New("upstream beacon node is not healthy. check your configuration.")
}

// Wait for any errors after the first healthy event
select {
case err := <-errs:
return err
Expand Down
6 changes: 5 additions & 1 deletion pkg/sentry/ethereum/services/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,14 @@ func (m *MetadataService) Start(ctx context.Context) error {
return nil
}

if err := backoff.Retry(operation, backoff.NewExponentialBackOff()); err != nil {
if err := backoff.RetryNotify(operation, backoff.NewExponentialBackOff(), func(err error, duration time.Duration) {
m.log.WithError(err).Warnf("Failed to refresh metadata, retrying in %s", duration)
}); err != nil {
m.log.WithError(err).Warn("Failed to refresh metadata")
}

m.log.Info("Metadata service is ready")

for _, cb := range m.onReadyCallbacks {
if err := cb(ctx); err != nil {
m.log.WithError(err).Warn("Failed to execute onReady callback")
Expand Down

0 comments on commit 0aa3b3c

Please sign in to comment.