Skip to content

Commit

Permalink
core/consensus: logging leader index
Browse files Browse the repository at this point in the history
  • Loading branch information
pinebit committed Oct 15, 2024
1 parent 87d1eae commit 01a48da
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 20 deletions.
31 changes: 20 additions & 11 deletions core/consensus/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,23 +445,32 @@ func (c *Component) runInstance(ctx context.Context, duty core.Duty) (err error)
}

// Instrument consensus instance.
var decided bool
var (
decided bool
nodes = len(c.peers)
)

decideCallback := func(qcommit []qbft.Msg[core.Duty, [32]byte]) {
round := qcommit[0].Round()
decided = true
decidedRoundsGauge.WithLabelValues(duty.Type.String(), string(roundTimer.Type())).Set(float64(qcommit[0].Round()))
decidedRoundsGauge.WithLabelValues(duty.Type.String(), string(roundTimer.Type())).Set(float64(round))
inst.decidedAtCh <- time.Now()
}

// Create a new qbft definition for this instance.
def := newDefinition(len(c.peers), c.subscribers, roundTimer, decideCallback)
leaderIndex := leader(duty, round, nodes)
leaderName := c.peers[leaderIndex].Name
log.Debug(ctx, "QBFT consensus decided",
z.Str("duty", duty.Type.String()),
z.U64("slot", duty.Slot),
z.I64("round", round),
z.I64("leader_index", leaderIndex),
z.Str("leader_name", leaderName))

if duty.Type == core.DutyProposer {
leaderIndex := leader(duty, 0, len(c.peers))
proposeLeaderGauge.Set(float64(leaderIndex))

log.Debug(ctx, "QBFT consensus leader index", z.I64("index", leaderIndex))
decidedLeaderGauge.Set(float64(leaderIndex))
}

// Create a new qbft definition for this instance.
def := newDefinition(nodes, c.subscribers, roundTimer, decideCallback)

// Create a new transport that handles sending and receiving for this instance.
t := transport{
component: c,
Expand All @@ -486,7 +495,7 @@ func (c *Component) runInstance(ctx context.Context, duty core.Duty) (err error)
}

// Run the algo, blocking until the context is cancelled.
err = qbft.Run[core.Duty, [32]byte](ctx, def, qt, duty, peerIdx, inst.hashCh)
err = qbft.Run(ctx, def, qt, duty, peerIdx, inst.hashCh)
if err != nil && !isContextErr(err) {
consensusError.Inc()
return err // Only return non-context errors.
Expand Down
18 changes: 10 additions & 8 deletions core/consensus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,21 @@ import (
)

var (
// Using gauge since the value changes slowly, once per slot.
decidedRoundsGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "decided_rounds",
Help: "Number of rounds it took to decide consensus instances by duty and timer type.",
}, []string{"duty", "timer"}) // Using gauge since the value changes slowly, once per slot.
}, []string{"duty", "timer"})

// Using gauge since the value changes slowly, once per slot.
decidedLeaderGauge = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "decided_leader_index",
Help: "Leader node index of the decision round.",
})

consensusDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "core",
Expand All @@ -37,11 +46,4 @@ var (
Name: "error_total",
Help: "Total count of consensus errors",
})

proposeLeaderGauge = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "propose_leader_index",
Help: "Index of leader node proposing a block for the first round",
})
)
2 changes: 1 addition & 1 deletion docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ when storing metrics from multiple nodes or clusters in one Prometheus instance.
| `core_bcast_recast_errors_total` | Counter | The total count of failed recasted registrations by source; `pregen` vs `downstream` | `source` |
| `core_bcast_recast_registration_total` | Counter | The total number of unique validator registration stored in recaster per pubkey | `pubkey` |
| `core_bcast_recast_total` | Counter | The total count of recasted registrations by source; `pregen` vs `downstream` | `source` |
| `core_consensus_decided_leader_index` | Gauge | Leader node index of the decision round. | |
| `core_consensus_decided_rounds` | Gauge | Number of rounds it took to decide consensus instances by duty and timer type. | `duty, timer` |
| `core_consensus_duration_seconds` | Histogram | Duration of a consensus instance in seconds by duty and timer type. | `duty, timer` |
| `core_consensus_error_total` | Counter | Total count of consensus errors | |
| `core_consensus_propose_leader_index` | Gauge | Index of leader node proposing a block for the first round | |
| `core_consensus_timeout_total` | Counter | Total count of consensus timeouts by duty and timer type. | `duty, timer` |
| `core_parsigdb_exit_total` | Counter | Total number of partially signed voluntary exits per public key | `pubkey` |
| `core_scheduler_current_epoch` | Gauge | The current epoch | |
Expand Down

0 comments on commit 01a48da

Please sign in to comment.