Skip to content

Commit

Permalink
Merge pull request #6898 from onflow/jord/benchnet-testing-updates
Browse files Browse the repository at this point in the history
[EFM] Fix bugs from Benchnet testing
  • Loading branch information
jordanschalm authored Jan 20, 2025
2 parents db5a8d7 + 21f9688 commit f847f03
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 7 deletions.
3 changes: 2 additions & 1 deletion cmd/consensus/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ func main() {
PostInit(func(nodeConfig *cmd.NodeConfig) error {
// TODO(EFM, #6794): This function is introduced to implement a backward-compatible upgrade from v1 to v2.
// Remove this once we complete the network upgrade.
if err := operation.RetryOnConflict(nodeBuilder.DB.Update, operation.MigrateDKGEndStateFromV1()); err != nil {
log := nodeConfig.Logger.With().Str("postinit", "dkg_end_state_migration").Logger()
if err := operation.RetryOnConflict(nodeBuilder.SecretsDB.Update, operation.MigrateDKGEndStateFromV1(log)); err != nil {
return fmt.Errorf("could not migrate DKG end state from v1 to v2: %w", err)
}
return nil
Expand Down
14 changes: 11 additions & 3 deletions engine/consensus/dkg/reactor_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,16 +284,24 @@ func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uin

return
}
if currentState != flow.DKGStateCompleted {
log.Warn().Msgf("checking beacon key consistency: exiting because dkg didn't reach completed state: %s", currentState.String())
// (i) if I have a key (currentState == flow.DKGStateCompleted) which is consistent with the EpochCommit service event,
// then commit the key or (ii) if the key is already committed (currentState == flow.RandomBeaconKeyCommitted), then we
// expect it to be consistent with the EpochCommit service event. While (ii) is a sanity check, we have a severe problem
// if it is violated, because a node signing with an invalid Random beacon key will be slashed - so we better check!
// Our logic for committing a key is idempotent: it is a no-op when stating that a key `k` should be committed that previously
// has already been committed; while it errors if `k` is different from the previously-committed key. In other words, the
// sanity check (ii) is already included in the happy-path logic for (i). So we just repeat the happy-path logic also for
// currentState == flow.RandomBeaconKeyCommitted, because repeated calls only occur due to node crashes, which are rare.
if currentState != flow.DKGStateCompleted && currentState != flow.RandomBeaconKeyCommitted {
log.Warn().Msgf("checking beacon key consistency after EpochCommit: exiting because dkg didn't reach completed state: %s", currentState.String())
return
}
snapshot := e.State.AtBlockID(firstBlock.ID())

// Since epoch phase transitions are emitted when the first block of the new
// phase is finalized, the block's snapshot is guaranteed to already be
// accessible in the protocol state at this point (even though the Badger
// transaction finalizing the block has not been committed yet).
snapshot := e.State.AtBlockID(firstBlock.ID())
nextDKG, err := snapshot.Epochs().Next().DKG()
if err != nil {
// CAUTION: this should never happen, indicates a storage failure or corruption
Expand Down
7 changes: 6 additions & 1 deletion storage/badger/operation/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"

"github.com/dgraph-io/badger/v2"
"github.com/rs/zerolog"

"github.com/onflow/flow-go/model/encodable"
"github.com/onflow/flow-go/model/flow"
Expand Down Expand Up @@ -81,7 +82,7 @@ func RetrieveDKGStateForEpoch(epochCounter uint64, currentState *flow.DKGState)
// It reads already stored data by deprecated prefix and writes it to the new prefix with values converted to the new representation.
// TODO(EFM, #6794): This function is introduced to implement a backward-compatible upgrade from v1 to v2.
// Remove this once we complete the network upgrade.
func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
func MigrateDKGEndStateFromV1(log zerolog.Logger) func(txn *badger.Txn) error {
return func(txn *badger.Txn) error {
var ops []func(*badger.Txn) error
err := traverse(makePrefix(codeDKGEndState), func() (checkFunc, createFunc, handleFunc) {
Expand Down Expand Up @@ -110,6 +111,7 @@ func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
ops = append(ops,
UpsertDKGStateForEpoch(epochCounter, newState),
remove(makePrefix(codeDKGEndState, epochCounter)))
log.Info().Msgf("removing %d->%d, writing %d->%d", epochCounter, oldState, epochCounter, newState)

return nil
}
Expand All @@ -124,6 +126,9 @@ func MigrateDKGEndStateFromV1() func(txn *badger.Txn) error {
return fmt.Errorf("aborting conversion from DKG end states: %w", err)
}
}
if len(ops) > 0 {
log.Info().Msgf("finished migrating %d DKG end states", len(ops))
}
return nil
}
}
4 changes: 2 additions & 2 deletions storage/badger/operation/dkg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func TestMigrateDKGEndStateFromV1(t *testing.T) {
}

// migrate the state
err := db.Update(MigrateDKGEndStateFromV1())
err := db.Update(MigrateDKGEndStateFromV1(unittest.Logger()))
assert.NoError(t, err)

assertMigrationSuccessful := func() {
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestMigrateDKGEndStateFromV1(t *testing.T) {
assertMigrationSuccessful()

// migrating again should be no-op
err = db.Update(MigrateDKGEndStateFromV1())
err = db.Update(MigrateDKGEndStateFromV1(unittest.Logger()))
assert.NoError(t, err)
assertMigrationSuccessful()
})
Expand Down

0 comments on commit f847f03

Please sign in to comment.