From c6801b967c31aff5bcdf5cc041bcc02378bf6be4 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Sun, 15 Feb 2026 21:35:40 +0100 Subject: [PATCH 01/11] DecodedValidator -> DecodedValidatorV1 --- crates/commonware-node/src/peer_manager/actor.rs | 4 ++-- crates/commonware-node/src/validators.rs | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index 2f3089b607..31613ce9fc 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -20,7 +20,7 @@ use tracing::{Span, error, info, info_span, instrument, warn}; use crate::{ consensus::block::Block, - validators::{self, DecodedValidator, read_validator_config_with_retry}, + validators::{self, DecodedValidatorV1, read_validator_config_with_retry}, }; use super::ingress::{Message, MessageWithCause}; @@ -294,7 +294,7 @@ fn is_past_hardfork(_block: &Block) -> bool { fn construct_peer_set( outcome: &OnchainDkgOutcome, - validators: &commonware_utils::ordered::Map, + validators: &commonware_utils::ordered::Map, ) -> commonware_utils::ordered::Map { // Dealers are output.players() from the previous epoch's DKG output. // Players are outcome.next_players (the players for the next DKG round). diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index fb6a33737e..adb62feab2 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -36,7 +36,7 @@ pub(crate) async fn read_validator_config_with_retry( node: &TempoFullNode, target: ReadTarget, total_attempts: &Counter, -) -> ordered::Map { +) -> ordered::Map { let mut attempts = 0; const MIN_RETRY: Duration = Duration::from_secs(1); const MAX_RETRY: Duration = Duration::from_secs(30); @@ -159,7 +159,7 @@ pub(crate) async fn read_from_contract_at_height( _attempt: u32, node: &TempoFullNode, height: Height, -) -> eyre::Result> { +) -> eyre::Result> { let raw_validators = read_validator_config_at_height(node, height, |config| { config .get_validators() @@ -174,12 +174,12 @@ pub(crate) async fn read_from_contract_at_height( #[instrument(skip_all, fields(validators_to_decode = contract_vals.len()))] async fn decode_from_contract( contract_vals: Vec, -) -> ordered::Map { +) -> ordered::Map { let mut decoded = HashMap::new(); for val in contract_vals.into_iter() { // NOTE: not reporting errors because `decode_from_contract` emits // events on success and error - if let Ok(val) = DecodedValidator::decode_from_contract(val) + if let Ok(val) = DecodedValidatorV1::decode_from_contract(val) && let Some(old) = decoded.insert(val.public_key.clone(), val) { warn!( @@ -199,7 +199,7 @@ async fn decode_from_contract( /// `` is either an IPv4 or IPV6 address, or a fully qualified domain name. /// `` is an IPv4 or IPv6 address. #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct DecodedValidator { +pub(crate) struct DecodedValidatorV1 { pub(crate) active: bool, /// The `publicKey` field of the contract. Used by other validators to /// identify a peer by verifying the signatures of its p2p messages and @@ -224,7 +224,7 @@ pub(crate) struct DecodedValidator { pub(crate) address: Address, } -impl DecodedValidator { +impl DecodedValidatorV1 { /// Attempts to decode a single validator from the values read in the smart contract. /// /// This function does not perform hostname lookup on either of the addresses. @@ -260,7 +260,7 @@ impl DecodedValidator { } } -impl std::fmt::Display for DecodedValidator { +impl std::fmt::Display for DecodedValidatorV1 { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "public key = `{}`, inbound = `{}`, outbound = `{}`, index = `{}`, address = `{}`", From 955491a1531af15b4577f0b33b6df2bca0679f3a Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Mon, 16 Feb 2026 00:31:18 +0100 Subject: [PATCH 02/11] peer manager reads from val config v1 or v2 depending on block timestamp --- Cargo.lock | 2 + crates/commonware-node/Cargo.toml | 4 +- .../commonware-node/src/peer_manager/actor.rs | 56 +++-- crates/commonware-node/src/validators.rs | 192 +++++++++++++++--- 4 files changed, 212 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c7fd9a5cb..d7141e7439 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11835,6 +11835,7 @@ dependencies = [ "futures", "governor", "indexmap 2.13.0", + "itertools 0.14.0", "jiff", "parking_lot", "pin-project", @@ -11851,6 +11852,7 @@ dependencies = [ "reth-revm", "reth-rpc-convert", "tempo-alloy", + "tempo-chainspec", "tempo-commonware-node-config", "tempo-dkg-onchain-artifacts", "tempo-node", diff --git a/crates/commonware-node/Cargo.toml b/crates/commonware-node/Cargo.toml index 23928d6d55..1f984fb457 100644 --- a/crates/commonware-node/Cargo.toml +++ b/crates/commonware-node/Cargo.toml @@ -12,6 +12,7 @@ workspace = true [dependencies] tempo-alloy.workspace = true +tempo-chainspec.workspace = true tempo-commonware-node-config.workspace = true tempo-dkg-onchain-artifacts.workspace = true tempo-node.workspace = true @@ -45,8 +46,10 @@ eyre.workspace = true futures.workspace = true governor.workspace = true indexmap.workspace = true +itertools.workspace = true jiff = { workspace = true, features = ["std"] } parking_lot.workspace = true +pin-project = "1.1.10" prometheus-client.workspace = true rand_08.workspace = true @@ -63,4 +66,3 @@ reth-rpc-convert.workspace = true tokio = { workspace = true, features = ["macros", "sync"] } tracing.workspace = true -pin-project = "1.1.10" diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index 31613ce9fc..c742c25775 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -1,3 +1,5 @@ +use std::net::SocketAddr; + use alloy_consensus::BlockHeader as _; use commonware_codec::ReadExt as _; use commonware_consensus::{ @@ -11,6 +13,7 @@ use commonware_runtime::{Clock, ContextCell, Metrics, Spawner, spawn_cell}; use commonware_utils::{Acknowledgement, acknowledgement::Exact}; use eyre::{OptionExt as _, WrapErr as _}; use futures::{StreamExt as _, channel::mpsc}; +use itertools::Either; use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; use reth_ethereum::network::NetworkInfo; use reth_provider::{BlockNumReader as _, HeaderProvider}; @@ -20,7 +23,7 @@ use tracing::{Span, error, info, info_span, instrument, warn}; use crate::{ consensus::block::Block, - validators::{self, DecodedValidatorV1, read_validator_config_with_retry}, + validators::{self, Validators, read_validator_config_with_retry}, }; use super::ingress::{Message, MessageWithCause}; @@ -173,6 +176,7 @@ where let all_validators = read_validator_config_with_retry( &self.context, &self.execution_node, + &header, validators::ReadTarget::AtLeast { height: last_boundary, }, @@ -264,6 +268,7 @@ where let all_validators = read_validator_config_with_retry( &self.context, &self.execution_node, + block.header(), validators::ReadTarget::AtLeast { height: block.height(), }, @@ -292,9 +297,9 @@ fn is_past_hardfork(_block: &Block) -> bool { false } -fn construct_peer_set( +pub(crate) fn construct_peer_set( outcome: &OnchainDkgOutcome, - validators: &commonware_utils::ordered::Map, + validators: &Validators, ) -> commonware_utils::ordered::Map { // Dealers are output.players() from the previous epoch's DKG output. // Players are outcome.next_players (the players for the next DKG round). @@ -303,21 +308,40 @@ fn construct_peer_set( .dealers() .iter() .chain(outcome.next_players().iter()) - .chain( - validators - .iter_pairs() - .filter(|(_, v)| v.active) - .map(|(k, _)| k), - ); + .chain(match validators { + Validators::V1(validators) => Either::Left( + validators + .iter_pairs() + .filter_map(|(k, v)| v.is_active().then_some(k)), + ), + Validators::V2(validators) => Either::Right( + validators + .iter_pairs() + .filter_map(|(k, v)| v.is_active().then_some(k)), + ), + }); commonware_utils::ordered::Map::from_iter_dedup(all_keys.map(|key| { - let addr = validators - .get_value(key) - .expect( - "all DKG participants must have an entry in the \ + let addr = match validators { + Validators::V1(vals) => commonware_p2p::Address::Symmetric( + vals.get_value(key) + .expect( + "all DKG participants must have an entry in the \ unfiltered, contract validator set", - ) - .outbound; - (key.clone(), commonware_p2p::Address::Symmetric(addr)) + ) + .outbound, + ), + Validators::V2(vals) => { + let val = vals.get_value(key).expect( + "all DKG participants must have an entry in the \ + unfiltered, contract validator set", + ); + commonware_p2p::Address::Asymmetric { + ingress: commonware_p2p::Ingress::Socket(val.ingress()), + egress: SocketAddr::new(val.egress(), 0), + } + } + }; + (key.clone(), addr) })) } diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index adb62feab2..1da086f7a6 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -1,10 +1,15 @@ -use std::{collections::HashMap, net::SocketAddr, time::Duration}; +use std::{ + collections::HashMap, + net::{IpAddr, SocketAddr}, + time::Duration, +}; +use alloy_consensus::BlockHeader; use alloy_primitives::Address; use commonware_codec::DecodeExt as _; use commonware_consensus::types::Height; use commonware_cryptography::ed25519::PublicKey; -use commonware_utils::ordered; +use commonware_utils::{TryFromIterator, ordered}; use eyre::{OptionExt as _, WrapErr as _}; use prometheus_client::metrics::counter::Counter; use reth_ethereum::{ @@ -16,12 +21,15 @@ use reth_provider::{ BlockHashReader as _, BlockIdReader as _, BlockNumReader as _, BlockReader as _, BlockSource, StateProviderFactory as _, }; +use tempo_chainspec::hardfork::TempoHardforks as _; use tempo_node::TempoFullNode; use tempo_precompiles::{ storage::StorageCtx, validator_config::{IValidatorConfig, ValidatorConfig}, + validator_config_v2::{IValidatorConfigV2, ValidatorConfigV2}, }; +use tempo_primitives::TempoHeader; use tracing::{Level, info, instrument, warn}; pub(crate) enum ReadTarget { @@ -31,12 +39,16 @@ pub(crate) enum ReadTarget { /// Attempts to read the validator config from the smart contract, retrying /// until the required block height is available. +/// +/// Uses the timestamp of `reference_header` to decide whether to read validator +/// config v1 or v2. pub(crate) async fn read_validator_config_with_retry( context: &impl commonware_runtime::Clock, node: &TempoFullNode, + reference_header: &TempoHeader, target: ReadTarget, total_attempts: &Counter, -) -> ordered::Map { +) -> Validators { let mut attempts = 0; const MIN_RETRY: Duration = Duration::from_secs(1); const MAX_RETRY: Duration = Duration::from_secs(30); @@ -56,7 +68,9 @@ pub(crate) async fn read_validator_config_with_retry( .unwrap_or(height), }; - if let Ok(validators) = read_from_contract_at_height(attempts, node, target_height).await { + if let Ok(validators) = + read_from_contract_at_height(attempts, node, target_height.get(), reference_header) + { break 'read_contract validators; } @@ -83,11 +97,14 @@ pub(crate) async fn read_validator_config_with_retry( } /// Reads state from the ValidatorConfig precompile at a given block height. -pub(crate) fn read_validator_config_at_height( +pub(crate) fn read_validator_config_at_height( node: &TempoFullNode, - height: Height, - read_fn: impl FnOnce(&ValidatorConfig) -> eyre::Result, -) -> eyre::Result { + height: u64, + read_fn: impl FnOnce(&C) -> eyre::Result, +) -> eyre::Result +where + C: Default, +{ // Try mapping the block height to a hash tracked by reth. // // First check the canonical chain, then fallback to pending block state. @@ -95,7 +112,7 @@ pub(crate) fn read_validator_config_at_height( // Necessary because the DKG and application actors process finalized block concurrently. let block_hash = if let Some(hash) = node .provider - .block_hash(height.get()) + .block_hash(height) .wrap_err_with(|| format!("failed reading block hash at height `{height}`"))? { hash @@ -103,7 +120,7 @@ pub(crate) fn read_validator_config_at_height( .provider .pending_block_num_hash() .wrap_err("failed reading pending block state")? - && pending.number == height.get() + && pending.number == height { pending.hash } else { @@ -138,13 +155,19 @@ pub(crate) fn read_validator_config_at_height( &ctx.block, &ctx.cfg, &ctx.tx, - || read_fn(&ValidatorConfig::new()), + || read_fn(&C::default()), ) } -/// Reads the validator config from the boundary block of `epoch`. +pub(crate) enum Validators { + V1(ordered::Map), + V2(ordered::Map), +} + +/// Reads the validator config at `height`. /// -/// If `epoch` is not set, reads the genesis block. +/// Uses `reference_header` to determine whether to read validators from +/// validator config v1 or v2. /// /// Note that this returns all validators, active and inactive. #[instrument( @@ -155,24 +178,68 @@ pub(crate) fn read_validator_config_at_height( ), err )] -pub(crate) async fn read_from_contract_at_height( +pub(crate) fn read_from_contract_at_height( _attempt: u32, node: &TempoFullNode, - height: Height, -) -> eyre::Result> { - let raw_validators = read_validator_config_at_height(node, height, |config| { - config - .get_validators() - .wrap_err("failed to query contract for validator config") - })?; + height: u64, + reference_header: &TempoHeader, +) -> eyre::Result { + let vals = if node + .chain_spec() + .is_t2_active_at_timestamp(reference_header.timestamp()) + && is_v2_active(node, height).wrap_err( + "failed reading from validator config v2 activity state after hardfork activation", + )? { + let raw_validators = + read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + })?; - info!(?raw_validators, "read validators from contract",); + info!( + ?raw_validators, + "read validators from validator config v2 contract", + ); + + let decoded_validators = raw_validators + .into_iter() + .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) + .collect::, _>>() + .wrap_err("failed an entry in the on-chain validator set")?; + + Validators::V2( + ordered::Map::try_from_iter( + decoded_validators + .into_iter() + .map(|validator| (validator.public_key.clone(), validator)), + ) + .wrap_err("contract contained validators with duplicate public keys")?, + ) + } else { + let raw_validators = + read_validator_config_at_height(node, height, |config: &ValidatorConfig| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + })?; + info!( + ?raw_validators, + "read validators from validator config v1 contract", + ); + Validators::V1(decode_from_contract(raw_validators)) + }; + Ok(vals) +} - Ok(decode_from_contract(raw_validators).await) +fn is_v2_active(node: &TempoFullNode, height: u64) -> eyre::Result { + read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { + config.is_initialized().map_err(eyre::Report::new) + }) } #[instrument(skip_all, fields(validators_to_decode = contract_vals.len()))] -async fn decode_from_contract( +fn decode_from_contract( contract_vals: Vec, ) -> ordered::Map { let mut decoded = HashMap::new(); @@ -230,7 +297,7 @@ impl DecodedValidatorV1 { /// This function does not perform hostname lookup on either of the addresses. /// Instead, only the shape of the addresses are checked for whether they are /// socket addresses (IP:PORT pairs), or fully qualified domain names. - #[instrument(ret(Display, level = Level::INFO), err(level = Level::WARN))] + #[instrument(ret(Display, level = Level::DEBUG), err(level = Level::WARN))] fn decode_from_contract( IValidatorConfig::Validator { active, @@ -258,6 +325,10 @@ impl DecodedValidatorV1 { address: validatorAddress, }) } + + pub(crate) fn is_active(&self) -> bool { + self.active + } } impl std::fmt::Display for DecodedValidatorV1 { @@ -268,3 +339,74 @@ impl std::fmt::Display for DecodedValidatorV1 { )) } } + +/// An entry in the validator config v2 contract with all its fields decoded +/// into Rust types. +pub(crate) struct DecodedValidatorV2 { + public_key: PublicKey, + ingress: SocketAddr, + egress: IpAddr, + added_at_height: u64, + deleted_at_height: u64, + index: u64, + address: Address, +} + +impl DecodedValidatorV2 { + pub(crate) fn public_key(&self) -> &PublicKey { + &self.public_key + } + + pub(crate) fn ingress(&self) -> SocketAddr { + self.ingress + } + + pub(crate) fn egress(&self) -> IpAddr { + self.egress + } + + pub(crate) fn is_active(&self) -> bool { + self.deleted_at_height == 0 + } + + #[instrument(ret(Display, level = Level::DEBUG), err(level = Level::WARN))] + fn decode_from_contract( + IValidatorConfigV2::Validator { + publicKey, + validatorAddress: address, + ingress, + egress, + index, + addedAtHeight: added_at_height, + deactivatedAtHeight: deleted_at_height, + }: IValidatorConfigV2::Validator, + ) -> eyre::Result { + let public_key = PublicKey::decode(publicKey.as_ref()) + .wrap_err("failed decoding publicKey field as ed25519 public key")?; + let ingress = ingress.parse().wrap_err("ingress was not valid")?; + let egress = egress.parse().wrap_err("egress was not valid")?; + Ok(Self { + public_key, + ingress, + egress, + added_at_height, + deleted_at_height, + index, + address, + }) + } +} +impl std::fmt::Display for DecodedValidatorV2 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "public key = `{}`, ingress = `{}`, egress = `{}`, added_at_height: `{}`, deleted_at_height = `{}`, index = `{}`, address = `{}`", + self.public_key, + self.ingress, + self.egress, + self.added_at_height, + self.deleted_at_height, + self.index, + self.address + )) + } +} From 47efabf800489c791b8bc13f89787151c93bbdf9 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Mon, 16 Feb 2026 16:36:33 +0100 Subject: [PATCH 03/11] dkg actor reads from v1 or v2, depending on initialization status --- .../src/dkg/manager/actor/mod.rs | 303 +++++++++++++----- crates/commonware-node/src/dkg/manager/mod.rs | 1 - .../src/dkg/manager/validators.rs | 29 -- crates/commonware-node/src/validators.rs | 63 +++- 4 files changed, 280 insertions(+), 116 deletions(-) delete mode 100644 crates/commonware-node/src/dkg/manager/validators.rs diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index 347b2a501e..24e7336ebf 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, num::NonZeroU32, task::Poll}; +use std::{collections::BTreeMap, num::NonZeroU32, task::Poll, time::Duration}; use alloy_consensus::BlockHeader as _; use bytes::{Buf, BufMut}; @@ -25,7 +25,7 @@ use commonware_p2p::{ }; use commonware_parallel::Sequential; use commonware_runtime::{Clock, ContextCell, Handle, IoBuf, Metrics as _, Spawner, spawn_cell}; -use commonware_utils::{Acknowledgement, N3f1, NZU32, ordered}; +use commonware_utils::{Acknowledgement, N3f1, NZU32, TryFromIterator as _, ordered}; use eyre::{OptionExt as _, WrapErr as _, bail, ensure, eyre}; use futures::{ @@ -33,9 +33,15 @@ use futures::{ }; use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; use rand_core::CryptoRngCore; +use reth_ethereum::network::NetworkInfo; use reth_provider::{BlockNumReader, HeaderProvider}; +use tempo_chainspec::hardfork::TempoHardforks; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; +use tempo_precompiles::{ + validator_config::ValidatorConfig, validator_config_v2::ValidatorConfigV2, +}; +use tempo_primitives::TempoHeader; use tracing::{Level, Span, debug, info, info_span, instrument, warn, warn_span}; use crate::{ @@ -43,9 +49,11 @@ use crate::{ dkg::manager::{ Command, ingress::{GetDkgOutcome, VerifyDealerLog}, - validators, }, - validators::read_validator_config_with_retry, + validators::{ + DecodedValidatorV2, decode_from_contract, is_v2_active, read_validator_config_at_height, + v2_activation_height, + }, }; mod state; @@ -169,6 +177,7 @@ where let initial_share = self.config.initial_share.clone(); let epoch_strategy = self.config.epoch_strategy.clone(); let mut marshal = self.config.marshal.clone(); + let attempts = self.metrics.attempts_to_read_validator_contract.clone(); async move { read_initial_state_and_set_floor( &mut context, @@ -176,6 +185,7 @@ where initial_share.clone(), &epoch_strategy, &mut marshal, + &attempts, ) .await } @@ -414,7 +424,7 @@ where ancestry_stream.update_receiver((msg.cause, request)); continue; } - if let Some((hole, request)) = self + if let Ok(Some((hole, request))) = self .handle_get_dkg_outcome( &msg.cause, storage, @@ -451,7 +461,7 @@ where let (cause, request) = ancestry_stream .take_request() .expect("if the stream is yielding blocks, there must be a receiver"); - if let Some((hole, request)) = self + if let Ok(Some((hole, request))) = self .handle_get_dkg_outcome(&cause, storage, &player_state, &round, &state, request) .await { @@ -691,20 +701,6 @@ where info!("reading validator from contract"); - let all_validators = read_validator_config_with_retry( - &self.context, - &self.config.execution_node, - crate::validators::ReadTarget::Exact { - height: self - .config - .epoch_strategy - .last(round.epoch()) - .expect("epoch strategy is valid for all epochs"), - }, - &self.metrics.attempts_to_read_validator_contract, - ) - .await; - let (local_output, mut share) = if let Some((outcome, share)) = storage.get_dkg_outcome(&state.epoch, &block.parent_digest()) { @@ -778,18 +774,22 @@ where self.metrics.successes.inc(); } + let syncers = read_syncers_if_v2_not_initialized_with_retry( + &self.context, + &self.config.execution_node, + block.header(), + &self.metrics.attempts_to_read_validator_contract, + ) + .await + .wrap_err("failed reading contract to determine syncers")?; + Ok(Some(state::State { epoch: onchain_outcome.epoch, seed: Summary::random(&mut self.context), output: onchain_outcome.output.clone(), share, players: onchain_outcome.next_players, - syncers: ordered::Set::from_iter_dedup( - all_validators - .iter_pairs() - .filter(|(_, v)| v.active) - .map(|(k, _)| k.clone()), - ), + syncers, is_full_dkg: onchain_outcome.is_next_full_dkg, })) } @@ -840,19 +840,16 @@ where info!("reading validators from contract"); - let all_validators = read_validator_config_with_retry( + // Only read syncers from the contract if t2 is not yet active and if + // val conf v2 has not yet been initialized. + let syncers = read_syncers_if_v2_not_initialized_with_retry( &self.context, &self.config.execution_node, - crate::validators::ReadTarget::Exact { - height: self - .config - .epoch_strategy - .last(round.epoch()) - .expect("epoch strategy is valid for all epochs"), - }, + block.header(), &self.metrics.attempts_to_read_validator_contract, ) - .await; + .await + .wrap_err("failed reading contract; must be able to read contract to continue")?; Ok(Some(state::State { epoch: onchain_outcome.epoch, @@ -860,12 +857,7 @@ where output: onchain_outcome.output.clone(), share: state::ShareState::Plaintext(None), players: onchain_outcome.next_players, - syncers: ordered::Set::from_iter_dedup( - all_validators - .iter_pairs() - .filter(|(_, v)| v.active) - .map(|(k, _)| k.clone()), - ), + syncers, is_full_dkg: onchain_outcome.is_next_full_dkg, })) } @@ -1026,6 +1018,7 @@ where as_player = player_state.is_some(), our.epoch = %round.epoch(), ), + err(level = Level::WARN), )] async fn handle_get_dkg_outcome( &mut self, @@ -1035,7 +1028,7 @@ where round: &state::Round, state: &State, request: GetDkgOutcome, - ) -> Option<(Digest, GetDkgOutcome)> + ) -> eyre::Result> where TStorageContext: commonware_runtime::Metrics + Clock + commonware_runtime::Storage, { @@ -1044,13 +1037,12 @@ where .epoch_strategy .containing(request.height) .expect("our strategy covers all epochs"); - if round.epoch() != epoch_info.epoch() { - warn!( - request.epoch = %epoch_info.epoch(), - "request is not for our epoch" - ); - return None; - } + + ensure!( + round.epoch() == epoch_info.epoch(), + "request is for epoch `{}`, not our epoch", + epoch_info.epoch(), + ); let output = if let Some((output, _)) = storage .get_dkg_outcome(&state.epoch, &request.digest) @@ -1087,7 +1079,7 @@ where height = block.height; digest = block.parent; } else { - return Some((digest, request)); + return Ok(Some((digest, request))); } } } @@ -1136,30 +1128,33 @@ where }; // Check if next ceremony should be full. - // Read from pre-last block of the epoch, but never ahead of the current request. let next_epoch = state.epoch.next(); - let is_next_full_dkg = - validators::read_next_full_dkg_ceremony(&self.config.execution_node, request.height) + let will_be_re_dkg = + crate::validators::read_re_dkg_epoch(&self.config.execution_node, request.height.get()) // in theory it should never fail, but if it does, just stick to reshare. .is_ok_and(|epoch| epoch == next_epoch.get()); - if is_next_full_dkg { - info!(%next_epoch, "next DKG will change the network identity and not be a reshare process"); - } + info!( + will_be_re_dkg, + %next_epoch, + "determined if the next epoch will be a reshare or full re-dkg process", + ); - if request + let next_players = + determine_next_players(&state, &self.config.execution_node, request.digest) + .wrap_err("could not determine who the next players are supposed to be")?; + request .response .send(OnchainDkgOutcome { epoch: next_epoch, output, - next_players: state.syncers.clone(), - is_next_full_dkg, + next_players, + is_next_full_dkg: will_be_re_dkg, }) - .is_err() - { - warn!("requester went away before speculative DKG outcome could be sent"); - }; + .map_err(|_| { + eyre!("requester went away before speculative DKG outcome could be sent") + })?; - None + Ok(None) } #[instrument(skip_all, fields(epoch = %state.epoch), err(level = Level::WARN))] @@ -1191,9 +1186,10 @@ async fn read_initial_state_and_set_floor( share: Option, epoch_strategy: &FixedEpocher, marshal: &mut crate::alias::marshal::Mailbox, + total_attempts: &Counter, ) -> eyre::Result where - TContext: CryptoRngCore, + TContext: Clock + CryptoRngCore, { let newest_height = node .provider @@ -1245,11 +1241,10 @@ where tempo_dkg_onchain_artifacts::OnchainDkgOutcome::read(&mut header.extra_data().as_ref()) .wrap_err("the boundary header did not contain the on-chain DKG outcome")?; - let all_validators = validators::read_from_contract_at_height(0, node, newest_height) - .await - .wrap_err_with(|| { - format!("failed reading validator config from block height `{newest_height}`") - })?; + let syncers = + read_syncers_if_v2_not_initialized_with_retry(context, node, &header, total_attempts) + .await + .wrap_err("failed determining syncing peers from contract")?; let share = state::ShareState::Plaintext('verify_initial_share: { let Some(share) = share else { @@ -1281,12 +1276,7 @@ where output: onchain_outcome.output.clone(), share, players: onchain_outcome.next_players, - syncers: ordered::Set::from_iter_dedup( - all_validators - .iter_pairs() - .filter(|(_, v)| v.active) - .map(|(k, _)| k.clone()), - ), + syncers, is_full_dkg: onchain_outcome.is_next_full_dkg, }) } @@ -1537,3 +1527,164 @@ fn read_dealer_log( .ok_or_eyre("failed checking signed log against current round")?; Ok((dealer, log)) } + +/// Reads syncing validators if the V2 contract is not yet initialized. +async fn read_syncers_if_v2_not_initialized_with_retry( + context: &impl commonware_runtime::Clock, + node: &TempoFullNode, + reference_header: &TempoHeader, + total_attempts: &Counter, +) -> eyre::Result> { + let mut attempts = 0; + const MIN_RETRY: Duration = Duration::from_secs(1); + const MAX_RETRY: Duration = Duration::from_secs(30); + + 'read_contract: loop { + total_attempts.inc(); + attempts += 1; + + if let Ok(syncers) = read_syncers_if_v2_not_initialized(attempts, node, reference_header) { + break 'read_contract Ok(syncers); + } + + let retry_after = MIN_RETRY.saturating_mul(attempts).min(MAX_RETRY); + let is_syncing = node.network.is_syncing(); + let best_block = node.provider.best_block_number(); + let blocks_behind = best_block + .as_ref() + .ok() + .map(|best| reference_header.number().saturating_sub(*best)); + tracing::warn_span!("read_validator_config_with_retry").in_scope(|| { + warn!( + attempts, + retry_after = %tempo_telemetry_util::display_duration(retry_after), + is_syncing, + best_block = %tempo_telemetry_util::display_result(&best_block), + height_if_v1 = reference_header.number(), + blocks_behind = %tempo_telemetry_util::display_option(&blocks_behind), + "reading validator config from contract failed; will retry", + ); + }); + context.sleep(retry_after).await; + } +} + +/// Reads the pre-t2 hardfork syncers from the v1 contract. +/// +/// If the validator config v2 contract is already initialized, then this +/// returns an empty set because after the hardfork syncers do not need to be +/// tracked. +/// +/// The implementation reads the immutable v2 initialization height to avoid +/// having to read a state at a specific block: if the initializaton has already +/// happened, `initialization_height <= height`, then reading validator v1 +/// is skipped and an empty list returned. +#[instrument( + skip_all, + fields( + attempt = _attempt, + height_if_v1 = reference_header.number(), + ), + err +)] +pub(crate) fn read_syncers_if_v2_not_initialized( + _attempt: u32, + node: &TempoFullNode, + reference_header: &TempoHeader, +) -> eyre::Result> { + if node + .chain_spec() + .is_t2_active_at_timestamp(reference_header.timestamp()) + { + let best = node.provider.best_block_number().wrap_err( + "no best block number available yet to check val config v2 activation height", + )?; + debug!( + best_height = best, + "checking best/latest block available in the execution layer for \ + validator config v2 activation height" + ); + let v2_activation_height = v2_activation_height(node, best) + .wrap_err("unable to read validator config v2 to check its activation height")?; + if reference_header.number() >= v2_activation_height { + debug!( + v2_activation_height, + "validator config v2 was already activated; no need to read \ + syncers from contract; returning empty set", + ); + return Ok(ordered::Set::default()); + } + } + + let raw_validators = read_validator_config_at_height( + node, + reference_header.number(), + |config: &ValidatorConfig| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + }, + )?; + info!( + ?raw_validators, + "read validators from validator config v1 contract", + ); + Ok(ordered::Set::from_iter_dedup( + decode_from_contract(raw_validators) + .iter_pairs() + .filter_map(|(k, v)| v.is_active().then_some(k.clone())), + )) +} + +/// Determines the next players depending on the header timestamp identiifed by `digest`. +fn determine_next_players( + state: &State, + node: &TempoFullNode, + digest: Digest, +) -> eyre::Result> { + let header = node + .provider + .header_by_hash_or_number(reth_ethereum::network::types::HashOrNumber::Hash(digest.0)) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("hash not known")) + .wrap_err_with(|| { + format!("failed reading header for block hash `{digest}` from execution layer") + })?; + let is_t2_hardfork_active = node + .chain_spec() + .is_t2_active_at_timestamp(header.timestamp()); + let is_val_conf_v2_active = is_v2_active(node, header.number()) + .wrap_err("failed reading contrat to determine if validator config v2 is active")?; + + let syncers = if is_t2_hardfork_active && is_val_conf_v2_active { + read_validator_config_at_height(node, header.number(), |config: &ValidatorConfigV2| { + let raw = config + .get_validators() + .wrap_err("failed to query contract for validator config")?; + + let decoded_validators = raw + .into_iter() + .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) + .collect::, _>>() + .wrap_err("failed decoding an entry in the on-chain validator set")?; + + ordered::Set::try_from_iter(decoded_validators.into_iter().filter_map(|validator| { + validator + .is_active_at_height(header.number()) + .then_some(validator.public_key().clone()) + })) + .wrap_err("contract contained validators with duplicate public keys") + }) + .wrap_err("failed reading validator config v2")? + } else { + state.syncers.clone() + }; + + debug!( + is_t2_hardfork_active, + is_val_conf_v2_active, + ?syncers, + "determined syncers" + ); + Ok(syncers) +} diff --git a/crates/commonware-node/src/dkg/manager/mod.rs b/crates/commonware-node/src/dkg/manager/mod.rs index 63ba95769c..eecc5b880c 100644 --- a/crates/commonware-node/src/dkg/manager/mod.rs +++ b/crates/commonware-node/src/dkg/manager/mod.rs @@ -8,7 +8,6 @@ use tempo_node::TempoFullNode; mod actor; mod ingress; -mod validators; pub(crate) use actor::Actor; pub(crate) use ingress::Mailbox; diff --git a/crates/commonware-node/src/dkg/manager/validators.rs b/crates/commonware-node/src/dkg/manager/validators.rs deleted file mode 100644 index 7dcb7c52c1..0000000000 --- a/crates/commonware-node/src/dkg/manager/validators.rs +++ /dev/null @@ -1,29 +0,0 @@ -pub(super) use crate::validators::read_from_contract_at_height; - -use commonware_consensus::types::Height; -use eyre::WrapErr as _; -use tempo_node::TempoFullNode; -use tracing::{Level, instrument}; - -/// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. -/// -/// This is used to determine if the next DKG ceremony should be a full ceremony -/// (new polynomial) instead of a reshare. -#[instrument( - skip_all, - fields( - at_height, - ), - err, - ret(level = Level::INFO) -)] -pub(super) fn read_next_full_dkg_ceremony( - node: &TempoFullNode, - at_height: Height, -) -> eyre::Result { - crate::validators::read_validator_config_at_height(node, at_height, |config| { - config - .get_next_full_dkg_ceremony() - .wrap_err("failed to query contract for next full dkg ceremony") - }) -} diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index 1da086f7a6..38c015ccde 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -32,9 +32,54 @@ use tempo_precompiles::{ use tempo_primitives::TempoHeader; use tracing::{Level, info, instrument, warn}; +pub(crate) fn v2_activation_height(node: &TempoFullNode, height: u64) -> eyre::Result { + read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { + config + .get_initialized_at_height() + .map_err(eyre::Report::new) + }) +} + +pub(crate) fn is_v2_active(node: &TempoFullNode, height: u64) -> eyre::Result { + read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { + config.is_initialized().map_err(eyre::Report::new) + }) +} + +/// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. +/// +/// This is used to determine if the next DKG ceremony should be a full ceremony +/// (new polynomial) instead of a reshare. +#[instrument( + skip_all, + fields( + at_height, + ), + err, + ret(level = Level::INFO) +)] +pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, at_height: u64) -> eyre::Result { + if at_height + >= v2_activation_height(node, at_height).wrap_err( + "failed reading contract to determine validator config v2 activation height", + )? + { + read_validator_config_at_height(node, at_height, |config: &ValidatorConfigV2| { + config + .get_next_full_dkg_ceremony() + .map_err(eyre::Report::new) + }) + } else { + read_validator_config_at_height(node, at_height, |config: &ValidatorConfig| { + config + .get_next_full_dkg_ceremony() + .map_err(eyre::Report::new) + }) + } +} + pub(crate) enum ReadTarget { AtLeast { height: Height }, - Exact { height: Height }, } /// Attempts to read the validator config from the smart contract, retrying @@ -58,7 +103,6 @@ pub(crate) async fn read_validator_config_with_retry( attempts += 1; let target_height = match target { - ReadTarget::Exact { height } => height, ReadTarget::AtLeast { height } => node .provider .best_block_number() @@ -232,14 +276,8 @@ pub(crate) fn read_from_contract_at_height( Ok(vals) } -fn is_v2_active(node: &TempoFullNode, height: u64) -> eyre::Result { - read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { - config.is_initialized().map_err(eyre::Report::new) - }) -} - #[instrument(skip_all, fields(validators_to_decode = contract_vals.len()))] -fn decode_from_contract( +pub(crate) fn decode_from_contract( contract_vals: Vec, ) -> ordered::Map { let mut decoded = HashMap::new(); @@ -369,8 +407,13 @@ impl DecodedValidatorV2 { self.deleted_at_height == 0 } + pub(crate) fn is_active_at_height(&self, height: u64) -> bool { + self.added_at_height <= height + && (self.deleted_at_height == 0 || self.deleted_at_height > height) + } + #[instrument(ret(Display, level = Level::DEBUG), err(level = Level::WARN))] - fn decode_from_contract( + pub(crate) fn decode_from_contract( IValidatorConfigV2::Validator { publicKey, validatorAddress: address, From dc1c845898a8e0b049869da3ac860d405a223e5a Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Mon, 16 Feb 2026 17:40:42 +0100 Subject: [PATCH 04/11] update peer sets on every block, but only if they changed --- .../src/dkg/manager/actor/mod.rs | 3 +- .../commonware-node/src/peer_manager/actor.rs | 120 +++++++++++++----- crates/commonware-node/src/validators.rs | 55 +++++--- 3 files changed, 129 insertions(+), 49 deletions(-) diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index 24e7336ebf..d054a71820 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -1616,7 +1616,7 @@ pub(crate) fn read_syncers_if_v2_not_initialized( } } - let raw_validators = read_validator_config_at_height( + let (_read_height, _read_hash, raw_validators) = read_validator_config_at_height( node, reference_header.number(), |config: &ValidatorConfig| { @@ -1676,6 +1676,7 @@ fn determine_next_players( .wrap_err("contract contained validators with duplicate public keys") }) .wrap_err("failed reading validator config v2")? + .2 } else { state.syncers.clone() }; diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index c742c25775..7bb64d840e 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -10,13 +10,14 @@ use commonware_consensus::{ use commonware_cryptography::ed25519::PublicKey; use commonware_p2p::{AddressableManager, Provider}; use commonware_runtime::{Clock, ContextCell, Metrics, Spawner, spawn_cell}; -use commonware_utils::{Acknowledgement, acknowledgement::Exact}; +use commonware_utils::{Acknowledgement, acknowledgement::Exact, ordered}; use eyre::{OptionExt as _, WrapErr as _}; use futures::{StreamExt as _, channel::mpsc}; use itertools::Either; use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; use reth_ethereum::network::NetworkInfo; -use reth_provider::{BlockNumReader as _, HeaderProvider}; +use reth_provider::HeaderProvider; +use tempo_chainspec::hardfork::TempoHardforks as _; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; use tracing::{Span, error, info, info_span, instrument, warn}; @@ -42,6 +43,8 @@ where contract_read_attempts: Counter, peers: Gauge, + + last_tracked_peer_set: Option, } impl Actor @@ -80,6 +83,7 @@ where mailbox, contract_read_attempts, peers, + last_tracked_peer_set: None, } } @@ -173,7 +177,7 @@ where format!("boundary block at `{last_boundary}` did not contain a valid DKG outcome") })?; - let all_validators = read_validator_config_with_retry( + let (read_height, read_hash, all_validators) = read_validator_config_with_retry( &self.context, &self.execution_node, &header, @@ -188,17 +192,33 @@ where self.peers.set(peers.len() as i64); let is_syncing = self.execution_node.network.is_syncing(); - let best_block = self.execution_node.provider.best_block_number(); info!( epoch = %onchain_outcome.epoch, %last_boundary, is_syncing, - best_block = %tempo_telemetry_util::display_result(&best_block), - peers = peers.len(), - "bootstrapped initial peer set from boundary block", + read_height, + %read_hash, + ?peers, + "bootstrapped initial peer set from last boundary block and best execution layer bock", ); - AddressableManager::track(&mut self.oracle, onchain_outcome.epoch.get(), peers).await; + let header = self + .execution_node + .provider + .header(read_hash) + .expect("must be access execution layer to get header - just read validator config for") + .expect("execution layer must have the header - just read validator config for it"); + let last_tracked_peer_seet = LastTrackedPeerSet { + id: header.number(), + peers, + }; + self.oracle + .track( + last_tracked_peer_seet.id, + last_tracked_peer_seet.peers.clone(), + ) + .await; + self.last_tracked_peer_set = Some(last_tracked_peer_seet); Ok(()) } @@ -250,22 +270,43 @@ where .containing(height) .expect("epoch strategy covers all heights"); - if is_past_hardfork(&block) { - // TODO: After the hardfork, read Val Config V2 getActiveValidators() - // from the execution node, build ordered::Map - // using Address::Asymmetric { ingress, egress }, and call - // oracle.overwrite(peers). - warn!("hardfork detected but V2 peer management not yet implemented"); - } else if height == epoch_info.last() { + // After hardfork, read every block. Otherwise, only read on boundary. + let read_contract = self + .execution_node + .chain_spec() + .is_t2_active_at_timestamp(block.timestamp()) + || height == epoch_info.last(); + + if read_contract { // Intentionally bail on parse failure: the last block of every epoch // must contain a valid DKG outcome. If it doesn't, something is // fundamentally wrong and we surface the error rather than silently // running with a stale peer set. - let extra_data = block.header().extra_data(); + let header; + let extra_data = if height == epoch_info.last() { + block.header().extra_data() + } else { + let last_boundary = epoch_info.epoch().previous().map_or(0, |epoch| { + self.epoch_strategy + .last(epoch) + .expect("valid for all epochs") + .get() + }); + header = self + .execution_node + .provider + .header_by_number(last_boundary) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("unknown header")) + .wrap_err_with(|| { + format!("failed reading header for last boundary height `{last_boundary}`") + })?; + header.extra_data() + }; let onchain_outcome = OnchainDkgOutcome::read(&mut extra_data.as_ref()) .wrap_err("could not read DKG outcome from boundary block")?; - let all_validators = read_validator_config_with_retry( + let (_read_height, _read_hash, all_validators) = read_validator_config_with_retry( &self.context, &self.execution_node, block.header(), @@ -277,26 +318,42 @@ where .await; let peers = construct_peer_set(&onchain_outcome, &all_validators); - self.peers.set(peers.len() as i64); - info!( - epoch = %onchain_outcome.epoch, - ?peers, - "tracking peers for new epoch from boundary block", - ); + if let Some(last_tracked_peer_set) = &mut self.last_tracked_peer_set { + if peers.keys() == last_tracked_peer_set.peers.keys() { + if peers.values() != last_tracked_peer_set.peers.values() { + self.oracle.overwrite(peers.clone()).await; + last_tracked_peer_set.peers = peers; + } + } else { + *last_tracked_peer_set = LastTrackedPeerSet { + id: block.height().get(), + peers, + }; + self.oracle + .track( + last_tracked_peer_set.id, + last_tracked_peer_set.peers.clone(), + ) + .await; + } + } else { + self.oracle.track(block.height().get(), peers.clone()).await; + self.last_tracked_peer_set = Some(LastTrackedPeerSet { + id: block.height().get(), + peers, + }) + } - AddressableManager::track(&mut self.oracle, onchain_outcome.epoch.get(), peers).await; + if let Some(tracked) = &self.last_tracked_peer_set { + self.peers.set(tracked.peers.len() as i64); + } } ack.acknowledge(); Ok(()) } } -/// Stub to implement hardfork logic. -fn is_past_hardfork(_block: &Block) -> bool { - false -} - pub(crate) fn construct_peer_set( outcome: &OnchainDkgOutcome, validators: &Validators, @@ -345,3 +402,8 @@ pub(crate) fn construct_peer_set( (key.clone(), addr) })) } + +struct LastTrackedPeerSet { + id: u64, + peers: ordered::Map, +} diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index 38c015ccde..481437d025 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -5,7 +5,7 @@ use std::{ }; use alloy_consensus::BlockHeader; -use alloy_primitives::Address; +use alloy_primitives::{Address, B256}; use commonware_codec::DecodeExt as _; use commonware_consensus::types::Height; use commonware_cryptography::ed25519::PublicKey; @@ -38,12 +38,14 @@ pub(crate) fn v2_activation_height(node: &TempoFullNode, height: u64) -> eyre::R .get_initialized_at_height() .map_err(eyre::Report::new) }) + .map(|(_, _, activation_height)| activation_height) } pub(crate) fn is_v2_active(node: &TempoFullNode, height: u64) -> eyre::Result { read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { config.is_initialized().map_err(eyre::Report::new) }) + .map(|(_, _, activated)| activated) } /// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. @@ -69,12 +71,14 @@ pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, at_height: u64) -> eyre::R .get_next_full_dkg_ceremony() .map_err(eyre::Report::new) }) + .map(|(_, _, epoch)| epoch) } else { read_validator_config_at_height(node, at_height, |config: &ValidatorConfig| { config .get_next_full_dkg_ceremony() .map_err(eyre::Report::new) }) + .map(|(_, _, epoch)| epoch) } } @@ -93,7 +97,7 @@ pub(crate) async fn read_validator_config_with_retry( reference_header: &TempoHeader, target: ReadTarget, total_attempts: &Counter, -) -> Validators { +) -> (u64, B256, Validators) { let mut attempts = 0; const MIN_RETRY: Duration = Duration::from_secs(1); const MAX_RETRY: Duration = Duration::from_secs(30); @@ -145,7 +149,7 @@ pub(crate) fn read_validator_config_at_height( node: &TempoFullNode, height: u64, read_fn: impl FnOnce(&C) -> eyre::Result, -) -> eyre::Result +) -> eyre::Result<(u64, B256, T)> where C: Default, { @@ -193,14 +197,18 @@ where .evm_for_block(db, block.header()) .wrap_err("failed instantiating evm for block")?; + let height = block.number(); + let hash = block.seal_slow().hash(); + let ctx = evm.ctx_mut(); - StorageCtx::enter_evm( + let res = StorageCtx::enter_evm( &mut ctx.journaled_state, &ctx.block, &ctx.cfg, &ctx.tx, || read_fn(&C::default()), - ) + )?; + Ok((height, hash, res)) } pub(crate) enum Validators { @@ -227,14 +235,15 @@ pub(crate) fn read_from_contract_at_height( node: &TempoFullNode, height: u64, reference_header: &TempoHeader, -) -> eyre::Result { +) -> eyre::Result<(u64, B256, Validators)> { let vals = if node .chain_spec() .is_t2_active_at_timestamp(reference_header.timestamp()) - && is_v2_active(node, height).wrap_err( - "failed reading from validator config v2 activity state after hardfork activation", - )? { - let raw_validators = + && height + >= v2_activation_height(node, height).wrap_err( + "failed reading from validator config v2 activity state after hardfork activation", + )? { + let (read_height, hash, raw_validators) = read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { config .get_validators() @@ -252,16 +261,20 @@ pub(crate) fn read_from_contract_at_height( .collect::, _>>() .wrap_err("failed an entry in the on-chain validator set")?; - Validators::V2( - ordered::Map::try_from_iter( - decoded_validators - .into_iter() - .map(|validator| (validator.public_key.clone(), validator)), - ) - .wrap_err("contract contained validators with duplicate public keys")?, + ( + read_height, + hash, + Validators::V2( + ordered::Map::try_from_iter( + decoded_validators + .into_iter() + .map(|validator| (validator.public_key.clone(), validator)), + ) + .wrap_err("contract contained validators with duplicate public keys")?, + ), ) } else { - let raw_validators = + let (read_height, hash, raw_validators) = read_validator_config_at_height(node, height, |config: &ValidatorConfig| { config .get_validators() @@ -271,7 +284,11 @@ pub(crate) fn read_from_contract_at_height( ?raw_validators, "read validators from validator config v1 contract", ); - Validators::V1(decode_from_contract(raw_validators)) + ( + read_height, + hash, + Validators::V1(decode_from_contract(raw_validators)), + ) }; Ok(vals) } From a152196777b839b108902dab2e6f036118188800 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Tue, 17 Feb 2026 13:48:39 +0100 Subject: [PATCH 05/11] debug tests - peer manager only needs to read finalized blocks optimistically; if it can't -> read on the next attempt --- .../src/consensus/application/actor.rs | 6 +- .../src/dkg/manager/actor/mod.rs | 133 ++++++++++----- .../commonware-node/src/peer_manager/actor.rs | 121 +++++++------- crates/commonware-node/src/validators.rs | 152 ++++-------------- crates/dkg-onchain-artifacts/src/lib.rs | 9 +- crates/e2e/src/tests/backfill.rs | 24 ++- crates/e2e/src/tests/consensus_rpc.rs | 4 +- crates/e2e/src/tests/dkg/common.rs | 8 +- .../src/tests/dkg/fast_sync_after_full_dkg.rs | 9 +- crates/e2e/src/tests/dkg/full_ceremony.rs | 6 +- crates/e2e/src/tests/dkg/share_loss.rs | 11 +- .../e2e/src/tests/dkg/static_transitions.rs | 6 +- 12 files changed, 251 insertions(+), 238 deletions(-) diff --git a/crates/commonware-node/src/consensus/application/actor.rs b/crates/commonware-node/src/consensus/application/actor.rs index b235d89e85..f248a7d252 100644 --- a/crates/commonware-node/src/consensus/application/actor.rs +++ b/crates/commonware-node/src/consensus/application/actor.rs @@ -515,6 +515,10 @@ impl Inner { ); info!( %outcome.epoch, + outcome.network_identity = %outcome.network_identity(), + outcome.dealers = ?outcome.dealers(), + outcome.dealers = ?outcome.players(), + outcome.dealers = ?outcome.next_players(), "received DKG outcome; will include in payload builder attributes", ); outcome.encode().into() @@ -822,7 +826,7 @@ async fn verify_block( PayloadStatusEnum::Syncing => { bail!( "failed validating block because payload is still syncing, \ - this means the parent block was available to the consensus + this means the parent block was available to the consensus \ layer but not the execution layer" ) } diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index d054a71820..24cb4a8bf9 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -35,7 +35,6 @@ use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; use rand_core::CryptoRngCore; use reth_ethereum::network::NetworkInfo; use reth_provider::{BlockNumReader, HeaderProvider}; -use tempo_chainspec::hardfork::TempoHardforks; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; use tempo_precompiles::{ @@ -51,8 +50,7 @@ use crate::{ ingress::{GetDkgOutcome, VerifyDealerLog}, }, validators::{ - DecodedValidatorV2, decode_from_contract, is_v2_active, read_validator_config_at_height, - v2_activation_height, + DecodedValidatorV2, can_use_v2, decode_from_contract, read_validator_config_at_height, }, }; @@ -1129,10 +1127,9 @@ where // Check if next ceremony should be full. let next_epoch = state.epoch.next(); - let will_be_re_dkg = - crate::validators::read_re_dkg_epoch(&self.config.execution_node, request.height.get()) - // in theory it should never fail, but if it does, just stick to reshare. - .is_ok_and(|epoch| epoch == next_epoch.get()); + let will_be_re_dkg = read_re_dkg_epoch(&self.config.execution_node, request.digest) + // in theory it should never fail, but if it does, just stick to reshare. + .is_ok_and(|epoch| epoch == next_epoch.get()); info!( will_be_re_dkg, %next_epoch, @@ -1592,28 +1589,17 @@ pub(crate) fn read_syncers_if_v2_not_initialized( node: &TempoFullNode, reference_header: &TempoHeader, ) -> eyre::Result> { - if node - .chain_spec() - .is_t2_active_at_timestamp(reference_header.timestamp()) + let best_header = best_header(node).wrap_err("no best header available in execution layer")?; + ensure!( + best_header.number() >= reference_header.number(), + "height of best available header below reference header; node still syncing?" + ); + // Take the best available + if can_use_v2(node, &best_header) + .wrap_err("could not determine if validator config v2 can be used or not")? { - let best = node.provider.best_block_number().wrap_err( - "no best block number available yet to check val config v2 activation height", - )?; - debug!( - best_height = best, - "checking best/latest block available in the execution layer for \ - validator config v2 activation height" - ); - let v2_activation_height = v2_activation_height(node, best) - .wrap_err("unable to read validator config v2 to check its activation height")?; - if reference_header.number() >= v2_activation_height { - debug!( - v2_activation_height, - "validator config v2 was already activated; no need to read \ - syncers from contract; returning empty set", - ); - return Ok(ordered::Set::default()); - } + debug!("validator config v2 active and initialized; no need to explicitly track syncers"); + return Ok(ordered::Set::default()); } let (_read_height, _read_hash, raw_validators) = read_validator_config_at_height( @@ -1636,7 +1622,14 @@ pub(crate) fn read_syncers_if_v2_not_initialized( )) } -/// Determines the next players depending on the header timestamp identiifed by `digest`. +/// Determines the next players depending on the header timestamp identified by `digest`. +/// +/// This function should be (and practically is) only called when constructing or +/// verifying a proposal. `digest` should therefore always refer to the parent +/// parent of the porposal. +/// +/// It is therefore save to assume that the execution layer should always have +/// the header and state corresponding to `digest` available. fn determine_next_players( state: &State, node: &TempoFullNode, @@ -1644,19 +1637,16 @@ fn determine_next_players( ) -> eyre::Result> { let header = node .provider - .header_by_hash_or_number(reth_ethereum::network::types::HashOrNumber::Hash(digest.0)) + .header(digest.0) .map_err(eyre::Report::new) .and_then(|maybe| maybe.ok_or_eyre("hash not known")) .wrap_err_with(|| { format!("failed reading header for block hash `{digest}` from execution layer") })?; - let is_t2_hardfork_active = node - .chain_spec() - .is_t2_active_at_timestamp(header.timestamp()); - let is_val_conf_v2_active = is_v2_active(node, header.number()) - .wrap_err("failed reading contrat to determine if validator config v2 is active")?; - - let syncers = if is_t2_hardfork_active && is_val_conf_v2_active { + let syncers = if can_use_v2(node, &header) + .wrap_err("failed determining if validator config v2 can be used")? + { + debug!("reading next players from validator config v2 contract"); read_validator_config_at_height(node, header.number(), |config: &ValidatorConfigV2| { let raw = config .get_validators() @@ -1678,14 +1668,71 @@ fn determine_next_players( .wrap_err("failed reading validator config v2")? .2 } else { + debug!("using validator config v1 syncers from state"); state.syncers.clone() }; - debug!( - is_t2_hardfork_active, - is_val_conf_v2_active, - ?syncers, - "determined syncers" - ); + debug!(?syncers, "determined syncers"); Ok(syncers) } + +/// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. +/// +/// This is used to determine if the next DKG ceremony should be a full ceremony +/// (new polynomial) instead of a reshare. +/// +/// +/// This function should be (and practically is) only called when constructing or +/// verifying a proposal. `digest` should therefore always refer to the parent +/// parent of the porposal. +/// +/// It is therefore save to assume that the execution layer should always have +/// the header and state corresponding to `digest` available. +#[instrument( + skip_all, + fields( + at_height, + ), + err, + ret(level = Level::INFO) +)] +pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, digest: Digest) -> eyre::Result { + let header = node + .provider + .header(digest.0) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("hash not known")) + .wrap_err_with(|| { + format!("failed reading header for block hash `{digest}` from execution layer") + })?; + if can_use_v2(node, &header) + .wrap_err("failed determining if validator config v2 can be used")? + { + read_validator_config_at_height(node, header.number(), |config: &ValidatorConfigV2| { + config + .get_next_full_dkg_ceremony() + .map_err(eyre::Report::new) + }) + .map(|(_, _, epoch)| epoch) + } else { + read_validator_config_at_height(node, header.number(), |config: &ValidatorConfig| { + config + .get_next_full_dkg_ceremony() + .map_err(eyre::Report::new) + }) + .map(|(_, _, epoch)| epoch) + } +} + +/// Returns the header corresponding to the best block number. +fn best_header(node: &TempoFullNode) -> eyre::Result { + let h = node + .provider + .best_block_number() + .wrap_err("failed reading best available block number from execution layer")?; + node.provider + .header_by_number(h) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("header not known")) + .wrap_err("could not read header for best available block number") +} diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index 7bb64d840e..a85ea7bc3a 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -1,30 +1,32 @@ use std::net::SocketAddr; use alloy_consensus::BlockHeader as _; +use alloy_primitives::B256; use commonware_codec::ReadExt as _; use commonware_consensus::{ Heightable as _, marshal::Update, - types::{Epocher as _, FixedEpocher, Height}, + types::{Epocher, FixedEpocher, Height}, }; use commonware_cryptography::ed25519::PublicKey; use commonware_p2p::{AddressableManager, Provider}; use commonware_runtime::{Clock, ContextCell, Metrics, Spawner, spawn_cell}; -use commonware_utils::{Acknowledgement, acknowledgement::Exact, ordered}; -use eyre::{OptionExt as _, WrapErr as _}; +use commonware_utils::{Acknowledgement, ordered}; +use eyre::{OptionExt as _, WrapErr as _, ensure}; use futures::{StreamExt as _, channel::mpsc}; use itertools::Either; -use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; +use prometheus_client::metrics::gauge::Gauge; use reth_ethereum::network::NetworkInfo; -use reth_provider::HeaderProvider; +use reth_provider::{BlockNumReader as _, HeaderProvider as _}; use tempo_chainspec::hardfork::TempoHardforks as _; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; -use tracing::{Span, error, info, info_span, instrument, warn}; +use tempo_primitives::TempoHeader; +use tracing::{Level, Span, error, info, info_span, instrument, warn}; use crate::{ consensus::block::Block, - validators::{self, Validators, read_validator_config_with_retry}, + validators::{self, Validators}, }; use super::ingress::{Message, MessageWithCause}; @@ -41,7 +43,6 @@ where last_finalized_height: Height, mailbox: mpsc::UnboundedReceiver, - contract_read_attempts: Counter, peers: Gauge, last_tracked_peer_set: Option, @@ -62,12 +63,6 @@ where }: super::Config, mailbox: mpsc::UnboundedReceiver, ) -> Self { - let contract_read_attempts = Counter::default(); - context.register( - "contract_read_attempts", - "how often the actor tried reading the validator config contract", - contract_read_attempts.clone(), - ); let peers = Gauge::default(); context.register( "peers", @@ -81,7 +76,6 @@ where epoch_strategy, last_finalized_height, mailbox, - contract_read_attempts, peers, last_tracked_peer_set: None, } @@ -177,16 +171,9 @@ where format!("boundary block at `{last_boundary}` did not contain a valid DKG outcome") })?; - let (read_height, read_hash, all_validators) = read_validator_config_with_retry( - &self.context, - &self.execution_node, - &header, - validators::ReadTarget::AtLeast { - height: last_boundary, - }, - &self.contract_read_attempts, - ) - .await; + let (read_height, read_hash, all_validators) = + read_validator_config(&self.execution_node, &header) + .wrap_err("unable to read initial peer set from execution layer")?; let peers = construct_peer_set(&onchain_outcome, &all_validators); self.peers.set(peers.len() as i64); @@ -209,12 +196,12 @@ where .expect("must be access execution layer to get header - just read validator config for") .expect("execution layer must have the header - just read validator config for it"); let last_tracked_peer_seet = LastTrackedPeerSet { - id: header.number(), + height: header.number(), peers, }; self.oracle .track( - last_tracked_peer_seet.id, + last_tracked_peer_seet.height, last_tracked_peer_seet.peers.clone(), ) .await; @@ -246,9 +233,10 @@ where } Message::Finalized(update) => match *update { Update::Block(block, ack) => { - self.handle_finalized_block(block, ack) + self.handle_finalized_block(block) .await .wrap_err("failed handling finalized block")?; + ack.acknowledge(); } Update::Tip { .. } => {} }, @@ -263,27 +251,25 @@ where ), err, )] - async fn handle_finalized_block(&mut self, block: Block, ack: Exact) -> eyre::Result<()> { - let height = commonware_consensus::Heightable::height(&block); + async fn handle_finalized_block(&mut self, block: Block) -> eyre::Result<()> { let epoch_info = self .epoch_strategy - .containing(height) + .containing(block.height()) .expect("epoch strategy covers all heights"); - // After hardfork, read every block. Otherwise, only read on boundary. - let read_contract = self - .execution_node - .chain_spec() - .is_t2_active_at_timestamp(block.timestamp()) - || height == epoch_info.last(); - - if read_contract { + // Always validator state on the boundary, and if we are past the hardfork. + if block.height() == epoch_info.last() + || self + .execution_node + .chain_spec() + .is_t2_active_at_timestamp(block.timestamp()) + { // Intentionally bail on parse failure: the last block of every epoch // must contain a valid DKG outcome. If it doesn't, something is // fundamentally wrong and we surface the error rather than silently // running with a stale peer set. let header; - let extra_data = if height == epoch_info.last() { + let extra_data = if block.height() == epoch_info.last() { block.header().extra_data() } else { let last_boundary = epoch_info.epoch().previous().map_or(0, |epoch| { @@ -306,16 +292,18 @@ where let onchain_outcome = OnchainDkgOutcome::read(&mut extra_data.as_ref()) .wrap_err("could not read DKG outcome from boundary block")?; - let (_read_height, _read_hash, all_validators) = read_validator_config_with_retry( - &self.context, - &self.execution_node, - block.header(), - validators::ReadTarget::AtLeast { - height: block.height(), - }, - &self.contract_read_attempts, - ) - .await; + let (_read_height, _read_hash, all_validators) = + match read_validator_config(&self.execution_node, block.header()) { + Ok(ret) => ret, + Err(reason) => { + info!( + %reason, + execution_layer.status = self.execution_node.network.is_syncing(), + "unable to read validator config; will retry on next block", + ); + return Ok(()); + } + }; let peers = construct_peer_set(&onchain_outcome, &all_validators); @@ -327,12 +315,12 @@ where } } else { *last_tracked_peer_set = LastTrackedPeerSet { - id: block.height().get(), + height: block.height().get(), peers, }; self.oracle .track( - last_tracked_peer_set.id, + last_tracked_peer_set.height, last_tracked_peer_set.peers.clone(), ) .await; @@ -340,7 +328,7 @@ where } else { self.oracle.track(block.height().get(), peers.clone()).await; self.last_tracked_peer_set = Some(LastTrackedPeerSet { - id: block.height().get(), + height: block.height().get(), peers, }) } @@ -349,7 +337,6 @@ where self.peers.set(tracked.peers.len() as i64); } } - ack.acknowledge(); Ok(()) } } @@ -404,6 +391,30 @@ pub(crate) fn construct_peer_set( } struct LastTrackedPeerSet { - id: u64, + height: u64, peers: ordered::Map, } + +/// Reads the smart contract at `reference_header.number` or higher, if +/// available. +#[instrument(skip_all, err(level = Level::INFO))] +fn read_validator_config( + node: &TempoFullNode, + reference_header: &TempoHeader, +) -> eyre::Result<(u64, B256, Validators)> { + // TODO: is this too harsh? Should we wait until there is a best block + // available at a later point? + let best_block_number = node + .provider + .best_block_number() + .wrap_err("provider does not have best block available")?; + + ensure!( + best_block_number >= reference_header.number(), + "best_block_number `{best_block_number}` is below reference header {}", + reference_header.number(), + ); + + validators::read_from_contract_at_height(1, node, best_block_number, reference_header) + .wrap_err("unable to read validators from best block") +} diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index 481437d025..f93418c480 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -1,24 +1,18 @@ use std::{ collections::HashMap, net::{IpAddr, SocketAddr}, - time::Duration, }; use alloy_consensus::BlockHeader; use alloy_primitives::{Address, B256}; use commonware_codec::DecodeExt as _; -use commonware_consensus::types::Height; use commonware_cryptography::ed25519::PublicKey; use commonware_utils::{TryFromIterator, ordered}; use eyre::{OptionExt as _, WrapErr as _}; -use prometheus_client::metrics::counter::Counter; -use reth_ethereum::{ - evm::revm::{State, database::StateProviderDatabase}, - network::NetworkInfo, -}; +use reth_ethereum::evm::revm::{State, database::StateProviderDatabase}; use reth_node_builder::{Block as _, ConfigureEvm as _}; use reth_provider::{ - BlockHashReader as _, BlockIdReader as _, BlockNumReader as _, BlockReader as _, BlockSource, + BlockHashReader as _, BlockIdReader as _, BlockReader as _, BlockSource, StateProviderFactory as _, }; use tempo_chainspec::hardfork::TempoHardforks as _; @@ -32,7 +26,7 @@ use tempo_precompiles::{ use tempo_primitives::TempoHeader; use tracing::{Level, info, instrument, warn}; -pub(crate) fn v2_activation_height(node: &TempoFullNode, height: u64) -> eyre::Result { +pub(crate) fn v2_initialization_height(node: &TempoFullNode, height: u64) -> eyre::Result { read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { config .get_initialized_at_height() @@ -41,109 +35,13 @@ pub(crate) fn v2_activation_height(node: &TempoFullNode, height: u64) -> eyre::R .map(|(_, _, activation_height)| activation_height) } -pub(crate) fn is_v2_active(node: &TempoFullNode, height: u64) -> eyre::Result { +pub(crate) fn is_v2_initialized(node: &TempoFullNode, height: u64) -> eyre::Result { read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { config.is_initialized().map_err(eyre::Report::new) }) .map(|(_, _, activated)| activated) } -/// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. -/// -/// This is used to determine if the next DKG ceremony should be a full ceremony -/// (new polynomial) instead of a reshare. -#[instrument( - skip_all, - fields( - at_height, - ), - err, - ret(level = Level::INFO) -)] -pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, at_height: u64) -> eyre::Result { - if at_height - >= v2_activation_height(node, at_height).wrap_err( - "failed reading contract to determine validator config v2 activation height", - )? - { - read_validator_config_at_height(node, at_height, |config: &ValidatorConfigV2| { - config - .get_next_full_dkg_ceremony() - .map_err(eyre::Report::new) - }) - .map(|(_, _, epoch)| epoch) - } else { - read_validator_config_at_height(node, at_height, |config: &ValidatorConfig| { - config - .get_next_full_dkg_ceremony() - .map_err(eyre::Report::new) - }) - .map(|(_, _, epoch)| epoch) - } -} - -pub(crate) enum ReadTarget { - AtLeast { height: Height }, -} - -/// Attempts to read the validator config from the smart contract, retrying -/// until the required block height is available. -/// -/// Uses the timestamp of `reference_header` to decide whether to read validator -/// config v1 or v2. -pub(crate) async fn read_validator_config_with_retry( - context: &impl commonware_runtime::Clock, - node: &TempoFullNode, - reference_header: &TempoHeader, - target: ReadTarget, - total_attempts: &Counter, -) -> (u64, B256, Validators) { - let mut attempts = 0; - const MIN_RETRY: Duration = Duration::from_secs(1); - const MAX_RETRY: Duration = Duration::from_secs(30); - - 'read_contract: loop { - total_attempts.inc(); - attempts += 1; - - let target_height = match target { - ReadTarget::AtLeast { height } => node - .provider - .best_block_number() - .ok() - .map(Height::new) - .filter(|best| best >= &height) - .unwrap_or(height), - }; - - if let Ok(validators) = - read_from_contract_at_height(attempts, node, target_height.get(), reference_header) - { - break 'read_contract validators; - } - - let retry_after = MIN_RETRY.saturating_mul(attempts).min(MAX_RETRY); - let is_syncing = node.network.is_syncing(); - let best_block = node.provider.best_block_number(); - let blocks_behind = best_block - .as_ref() - .ok() - .map(|best| target_height.get().saturating_sub(*best)); - tracing::warn_span!("read_validator_config_with_retry").in_scope(|| { - warn!( - attempts, - retry_after = %tempo_telemetry_util::display_duration(retry_after), - is_syncing, - best_block = %tempo_telemetry_util::display_result(&best_block), - %target_height, - blocks_behind = %tempo_telemetry_util::display_option(&blocks_behind), - "reading validator config from contract failed; will retry", - ); - }); - context.sleep(retry_after).await; - } -} - /// Reads state from the ValidatorConfig precompile at a given block height. pub(crate) fn read_validator_config_at_height( node: &TempoFullNode, @@ -216,7 +114,27 @@ pub(crate) enum Validators { V2(ordered::Map), } -/// Reads the validator config at `height`. +/// Returns if the validator config v2 can be used exactly at the heigth and +/// timestamp of `header`. +/// +/// +/// Validator Config V2 can be used if: +/// +/// 1. if `header.timestamp` is active at the hardfork timestamp. +/// 2. if `header.number` is equal or greater than the contract initialization height. +/// 3. if the contract initialization flag is set. +pub(crate) fn can_use_v2(node: &TempoFullNode, header: &TempoHeader) -> eyre::Result { + Ok(node + .chain_spec() + .is_t2_active_at_timestamp(header.timestamp()) + && is_v2_initialized(node, header.number()) + .wrap_err("failed reading validator config v2 initialization flag")? + && v2_initialization_height(node, header.number()) + .wrap_err("failed reading validator config v2 initialization height")? + <= header.number()) +} + +/// Reads the validator config at `read_height`. /// /// Uses `reference_header` to determine whether to read validators from /// validator config v1 or v2. @@ -226,25 +144,21 @@ pub(crate) enum Validators { skip_all, fields( attempt = _attempt, - %height, + %read_height, ), - err + err(level = Level::WARN), )] pub(crate) fn read_from_contract_at_height( _attempt: u32, node: &TempoFullNode, - height: u64, + read_height: u64, reference_header: &TempoHeader, ) -> eyre::Result<(u64, B256, Validators)> { - let vals = if node - .chain_spec() - .is_t2_active_at_timestamp(reference_header.timestamp()) - && height - >= v2_activation_height(node, height).wrap_err( - "failed reading from validator config v2 activity state after hardfork activation", - )? { + let vals = if can_use_v2(node, reference_header) + .wrap_err("failed to determine if the v2 validator config contract can be used")? + { let (read_height, hash, raw_validators) = - read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { + read_validator_config_at_height(node, read_height, |config: &ValidatorConfigV2| { config .get_validators() .wrap_err("failed to query contract for validator config") @@ -275,7 +189,7 @@ pub(crate) fn read_from_contract_at_height( ) } else { let (read_height, hash, raw_validators) = - read_validator_config_at_height(node, height, |config: &ValidatorConfig| { + read_validator_config_at_height(node, read_height, |config: &ValidatorConfig| { config .get_validators() .wrap_err("failed to query contract for validator config") diff --git a/crates/dkg-onchain-artifacts/src/lib.rs b/crates/dkg-onchain-artifacts/src/lib.rs index 2c7b0bb2d1..ae56441134 100644 --- a/crates/dkg-onchain-artifacts/src/lib.rs +++ b/crates/dkg-onchain-artifacts/src/lib.rs @@ -8,7 +8,10 @@ use commonware_consensus::types::Epoch; use commonware_cryptography::{ bls12381::{ dkg::Output, - primitives::{sharing::Sharing, variant::MinSig}, + primitives::{ + sharing::Sharing, + variant::{MinSig, Variant}, + }, }, ed25519::PublicKey, }; @@ -56,6 +59,10 @@ impl OnchainDkgOutcome { pub fn sharing(&self) -> &Sharing { self.output.public() } + + pub fn network_identity(&self) -> &::Public { + self.sharing().public() + } } impl Write for OnchainDkgOutcome { diff --git a/crates/e2e/src/tests/backfill.rs b/crates/e2e/src/tests/backfill.rs index e2f7c936d9..c7332f5995 100644 --- a/crates/e2e/src/tests/backfill.rs +++ b/crates/e2e/src/tests/backfill.rs @@ -2,7 +2,7 @@ use std::time::Duration; use commonware_macros::test_traced; use commonware_runtime::{ - Clock, Runner as _, + Clock, Metrics, Runner as _, deterministic::{self, Context, Runner}, }; use futures::future::join_all; @@ -34,7 +34,6 @@ async fn run_validator_late_join_test( context.sleep(Duration::from_secs(1)).await; } - // Start the last node last.start(context).await; assert_eq!(last.execution_provider().last_block_number().unwrap(), 0); @@ -43,6 +42,7 @@ async fn run_validator_late_join_test( // Assert that last node is able to catch up and progress while last.execution_provider().last_block_number().unwrap() < blocks_after_join { context.sleep(Duration::from_millis(100)).await; + assert_no_new_epoch(context, 0); } // Verify backfill behavior let actual_runs = get_pipeline_runs(metrics_recorder); @@ -84,3 +84,23 @@ fn validator_can_join_later_with_pipeline_sync() { run_validator_late_join_test(&mut context, 65, 70, true).await; }); } + +#[track_caller] +fn assert_no_new_epoch(context: &impl Metrics, max_epoch: u64) { + let metrics = context.encode(); + for line in metrics.lines() { + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metrics.ends_with("_peers_blocked") { + let value = value.parse::().unwrap(); + assert_eq!(value, 0); + } + + if metric.ends_with("_epoch_manager_latest_epoch") { + let value = value.parse::().unwrap(); + assert!(value <= max_epoch, "epoch progressed; sync likely failed"); + } + } +} diff --git a/crates/e2e/src/tests/consensus_rpc.rs b/crates/e2e/src/tests/consensus_rpc.rs index 5b787f2007..0a0e4bafa6 100644 --- a/crates/e2e/src/tests/consensus_rpc.rs +++ b/crates/e2e/src/tests/consensus_rpc.rs @@ -5,7 +5,7 @@ use std::{net::SocketAddr, time::Duration}; -use super::dkg::common::{assert_no_dkg_failures, wait_for_epoch, wait_for_outcome}; +use super::dkg::common::{assert_no_dkg_failures, wait_for_validators_to_reach_epoch, wait_for_outcome}; use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; use alloy::transports::http::reqwest::Url; use alloy_primitives::hex; @@ -193,7 +193,7 @@ fn get_identity_transition_proof_after_full_dkg() { let pubkey_before = *outcome_before.sharing().public(); // Wait for full DKG to complete - wait_for_epoch(&context, full_dkg_epoch + 1, how_many_signers).await; + wait_for_validators_to_reach_epoch(&context, full_dkg_epoch + 1, how_many_signers).await; assert_no_dkg_failures(&context); // Verify the full DKG created a new public key diff --git a/crates/e2e/src/tests/dkg/common.rs b/crates/e2e/src/tests/dkg/common.rs index 82a9dcef12..62edc35bcd 100644 --- a/crates/e2e/src/tests/dkg/common.rs +++ b/crates/e2e/src/tests/dkg/common.rs @@ -88,7 +88,11 @@ pub(crate) fn count_validators_at_epoch(context: &Context, target_epoch: u64) -> } /// Waits until at least `min_validators` have reached the target epoch. -pub(crate) async fn wait_for_epoch(context: &Context, target_epoch: u64, min_validators: u32) { +pub(crate) async fn wait_for_validators_to_reach_epoch( + context: &Context, + target_epoch: u64, + min_validators: u32, +) { tracing::info!(target_epoch, min_validators, "Waiting for epoch"); loop { @@ -102,6 +106,7 @@ pub(crate) async fn wait_for_epoch(context: &Context, target_epoch: u64, min_val } /// Asserts that no DKG ceremony failures have occurred. +#[track_caller] pub(crate) fn assert_no_dkg_failures(context: &Context) { let metrics = context.encode(); @@ -117,6 +122,7 @@ pub(crate) fn assert_no_dkg_failures(context: &Context) { } /// Asserts that at least one validator has skipped rounds (indicating sync occurred). +#[track_caller] pub(crate) fn assert_skipped_rounds(context: &Context) { let metrics = context.encode(); diff --git a/crates/e2e/src/tests/dkg/fast_sync_after_full_dkg.rs b/crates/e2e/src/tests/dkg/fast_sync_after_full_dkg.rs index 533f626c19..cc2a92dde8 100644 --- a/crates/e2e/src/tests/dkg/fast_sync_after_full_dkg.rs +++ b/crates/e2e/src/tests/dkg/fast_sync_after_full_dkg.rs @@ -9,9 +9,11 @@ use commonware_runtime::{ use futures::future::join_all; use reth_ethereum::storage::BlockNumReader as _; use std::time::Duration; +use tracing::info; use super::common::{ - assert_no_dkg_failures, assert_skipped_rounds, wait_for_epoch, wait_for_outcome, + assert_no_dkg_failures, assert_skipped_rounds, wait_for_outcome, + wait_for_validators_to_reach_epoch, }; use crate::{Setup, setup_validators}; @@ -58,7 +60,6 @@ fn validator_can_fast_sync_after_full_dkg() { .await .unwrap(); - // wait for is_next_full_dkg flag let outcome_before = wait_for_outcome(&context, &validators, full_dkg_epoch - 1, epoch_length).await; assert!( @@ -68,7 +69,8 @@ fn validator_can_fast_sync_after_full_dkg() { let pubkey_before = *outcome_before.sharing().public(); // wait for full DKG completion (-1 because late validator not started yet) - wait_for_epoch(&context, full_dkg_epoch + 1, how_many_signers - 1).await; + wait_for_validators_to_reach_epoch(&context, full_dkg_epoch + 1, how_many_signers - 1) + .await; // verify new public key let outcome_after = @@ -91,6 +93,7 @@ fn validator_can_fast_sync_after_full_dkg() { // start late validator late_validator.start(&context).await; + info!(id = late_validator.uid, "started late validator",); assert_eq!( late_validator .execution_provider() diff --git a/crates/e2e/src/tests/dkg/full_ceremony.rs b/crates/e2e/src/tests/dkg/full_ceremony.rs index 3e33ad6544..99322aac39 100644 --- a/crates/e2e/src/tests/dkg/full_ceremony.rs +++ b/crates/e2e/src/tests/dkg/full_ceremony.rs @@ -8,7 +8,7 @@ use commonware_runtime::{ }; use futures::future::join_all; -use super::common::{assert_no_dkg_failures, wait_for_epoch, wait_for_outcome}; +use super::common::{assert_no_dkg_failures, wait_for_validators_to_reach_epoch, wait_for_outcome}; use crate::{Setup, setup_validators}; #[test_traced] @@ -77,7 +77,7 @@ impl FullDkgTest { tracing::info!(?pubkey_before, "Group public key BEFORE full DKG"); // Step 2: Wait for full DKG to complete (epoch N+1) - wait_for_epoch(&context, self.full_dkg_epoch + 1, self.how_many_signers).await; + wait_for_validators_to_reach_epoch(&context, self.full_dkg_epoch + 1, self.how_many_signers).await; assert_no_dkg_failures(&context); // Step 3: Verify full DKG created a NEW polynomial (different public key) @@ -99,7 +99,7 @@ impl FullDkgTest { tracing::info!("Verified: full DKG created independent polynomial"); // Step 4: Wait for reshare (epoch N+2) and verify it PRESERVES the public key - wait_for_epoch(&context, self.full_dkg_epoch + 2, self.how_many_signers).await; + wait_for_validators_to_reach_epoch(&context, self.full_dkg_epoch + 2, self.how_many_signers).await; assert_no_dkg_failures(&context); let outcome_after_reshare = wait_for_outcome( diff --git a/crates/e2e/src/tests/dkg/share_loss.rs b/crates/e2e/src/tests/dkg/share_loss.rs index 8a535152a8..56c416f124 100644 --- a/crates/e2e/src/tests/dkg/share_loss.rs +++ b/crates/e2e/src/tests/dkg/share_loss.rs @@ -9,7 +9,7 @@ use futures::future::join_all; use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; -#[test_traced("WARN")] +#[test_traced] fn validator_lost_share_but_gets_share_in_next_epoch() { let _ = tempo_eyre::install(); @@ -19,8 +19,11 @@ fn validator_lost_share_but_gets_share_in_next_epoch() { let executor = Runner::from(cfg); executor.start(|mut context| async move { - let epoch_length = 30; - let setup = Setup::new().seed(seed).epoch_length(epoch_length); + let epoch_length = 20; + let setup = Setup::new() + .seed(seed) + .epoch_length(epoch_length) + .connect_execution_layer_nodes(true); let (mut validators, _execution_runtime) = setup_validators(&mut context, setup.clone()).await; @@ -70,7 +73,6 @@ fn validator_lost_share_but_gets_share_in_next_epoch() { && metric.ends_with("_epoch_manager_how_often_verifier_total") { let value = value.parse::().unwrap(); - tracing::warn!(metric, value,); node_forgot_share = value > 0; } @@ -80,7 +82,6 @@ fn validator_lost_share_but_gets_share_in_next_epoch() { && metric.ends_with("_epoch_manager_how_often_signer_total") { let value = value.parse::().unwrap(); - tracing::warn!(metric, value,); if value > 0 { break 'acquire_share; } diff --git a/crates/e2e/src/tests/dkg/static_transitions.rs b/crates/e2e/src/tests/dkg/static_transitions.rs index 55e6265c6b..600ceb01ec 100644 --- a/crates/e2e/src/tests/dkg/static_transitions.rs +++ b/crates/e2e/src/tests/dkg/static_transitions.rs @@ -10,7 +10,7 @@ use crate::{Setup, run}; fn single_validator_can_transition_once() { AssertStaticTransitions { how_many: 1, - epoch_length: 10, + epoch_length: 5, transitions: 1, } .run(); @@ -20,7 +20,7 @@ fn single_validator_can_transition_once() { fn single_validator_can_transition_twice() { AssertStaticTransitions { how_many: 1, - epoch_length: 10, + epoch_length: 5, transitions: 2, } .run(); @@ -30,7 +30,7 @@ fn single_validator_can_transition_twice() { fn single_validator_can_transition_four_times() { AssertStaticTransitions { how_many: 1, - epoch_length: 10, + epoch_length: 5, transitions: 4, } .run(); From a5c95437f4b42873e2ac8a919a8fcec3f0dd5aa2 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Tue, 17 Feb 2026 18:02:56 +0100 Subject: [PATCH 06/11] clean up reading from contracts; add metrics --- .../src/dkg/manager/actor/mod.rs | 270 +++++++++++------- crates/commonware-node/src/validators.rs | 122 +++++++- .../mod.rs} | 0 3 files changed, 278 insertions(+), 114 deletions(-) rename crates/e2e/src/tests/dkg/{static_transitions.rs => static_transitions/mod.rs} (100%) diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index 24cb4a8bf9..b52f09a2ce 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -35,12 +35,12 @@ use prometheus_client::metrics::{counter::Counter, gauge::Gauge}; use rand_core::CryptoRngCore; use reth_ethereum::network::NetworkInfo; use reth_provider::{BlockNumReader, HeaderProvider}; +use tempo_chainspec::hardfork::TempoHardforks; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; use tempo_precompiles::{ validator_config::ValidatorConfig, validator_config_v2::ValidatorConfigV2, }; -use tempo_primitives::TempoHeader; use tracing::{Level, Span, debug, info, info_span, instrument, warn, warn_span}; use crate::{ @@ -50,7 +50,9 @@ use crate::{ ingress::{GetDkgOutcome, VerifyDealerLog}, }, validators::{ - DecodedValidatorV2, can_use_v2, decode_from_contract, read_validator_config_at_height, + DecodedValidatorV2, can_use_v2_at_block_hash, decode_from_contract, + is_v2_initialized_at_height, read_validator_config_at_block_hash, + read_validator_config_at_height, }, }; @@ -175,7 +177,6 @@ where let initial_share = self.config.initial_share.clone(); let epoch_strategy = self.config.epoch_strategy.clone(); let mut marshal = self.config.marshal.clone(); - let attempts = self.metrics.attempts_to_read_validator_contract.clone(); async move { read_initial_state_and_set_floor( &mut context, @@ -183,7 +184,6 @@ where initial_share.clone(), &epoch_strategy, &mut marshal, - &attempts, ) .await } @@ -763,9 +763,9 @@ where share = state::ShareState::Plaintext(None); } - // Because we use cached data we, need to check for DKG success here: - // if the on-chain output is the input output (the output of the previous - // state), then we know the DKG failed. + // Because we use cached data, we need to check for DKG success here: + // if the on-chain output is the same as the input into the loop (which + // is just state.output), then we know the DKG failed. if onchain_outcome.output == state.output { self.metrics.failures.inc(); } else { @@ -775,8 +775,9 @@ where let syncers = read_syncers_if_v2_not_initialized_with_retry( &self.context, &self.config.execution_node, - block.header(), + &block, &self.metrics.attempts_to_read_validator_contract, + &self.metrics.read_players_from_v1_contract, ) .await .wrap_err("failed reading contract to determine syncers")?; @@ -843,8 +844,9 @@ where let syncers = read_syncers_if_v2_not_initialized_with_retry( &self.context, &self.config.execution_node, - block.header(), + &block, &self.metrics.attempts_to_read_validator_contract, + &self.metrics.read_players_from_v1_contract, ) .await .wrap_err("failed reading contract; must be able to read contract to continue")?; @@ -1127,18 +1129,27 @@ where // Check if next ceremony should be full. let next_epoch = state.epoch.next(); - let will_be_re_dkg = read_re_dkg_epoch(&self.config.execution_node, request.digest) - // in theory it should never fail, but if it does, just stick to reshare. - .is_ok_and(|epoch| epoch == next_epoch.get()); + let will_be_re_dkg = read_re_dkg_epoch( + &self.config.execution_node, + request.digest, + &self.metrics.read_re_dkg_epoch_from_v1_contract, + &self.metrics.read_re_dkg_epoch_from_v2_contract, + ) + // in theory it should never fail, but if it does, just stick to reshare. + .is_ok_and(|epoch| epoch == next_epoch.get()); info!( will_be_re_dkg, %next_epoch, "determined if the next epoch will be a reshare or full re-dkg process", ); - let next_players = - determine_next_players(&state, &self.config.execution_node, request.digest) - .wrap_err("could not determine who the next players are supposed to be")?; + let next_players = determine_next_players( + &state, + &self.config.execution_node, + request.digest, + &self.metrics.read_players_from_v2_contract, + ) + .wrap_err("could not determine who the next players are supposed to be")?; request .response .send(OnchainDkgOutcome { @@ -1183,7 +1194,6 @@ async fn read_initial_state_and_set_floor( share: Option, epoch_strategy: &FixedEpocher, marshal: &mut crate::alias::marshal::Mailbox, - total_attempts: &Counter, ) -> eyre::Result where TContext: Clock + CryptoRngCore, @@ -1228,20 +1238,39 @@ where format!("failed to read header for last boundary block number `{last_boundary}`") })?; - // XXX: Reads the contract from the latest available block (newest_height), - // not from the boundary. The reason is that we cannot be sure that the - // boundary block is available. But we know that the on-chain state is - // immutable - validators never change their identity and never update their - // IP addresses (the latter would actually probably be fine; what matters is - // that identities don't change). let onchain_outcome = tempo_dkg_onchain_artifacts::OnchainDkgOutcome::read(&mut header.extra_data().as_ref()) .wrap_err("the boundary header did not contain the on-chain DKG outcome")?; - let syncers = - read_syncers_if_v2_not_initialized_with_retry(context, node, &header, total_attempts) - .await - .wrap_err("failed determining syncing peers from contract")?; + // Don't need to read the syncers if past the hardfork and V2 is already + // active on that epoch. + let syncers = if node + .chain_spec() + .is_t2_active_at_timestamp(header.timestamp()) + && is_v2_initialized_at_height(&node, header.number()) + .wrap_err("unable to determine if v2 contract is already initialized")? + { + ordered::Set::default() + } else { + let (_read_height, _read_hash, raw_validators) = read_validator_config_at_height( + node, + last_boundary.get(), + |config: &ValidatorConfig| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + }, + )?; + info!( + ?raw_validators, + "read validators from validator config v1 contract", + ); + ordered::Set::from_iter_dedup( + decode_from_contract(raw_validators) + .iter_pairs() + .filter_map(|(k, v)| v.is_active().then_some(k.clone())), + ) + }; let share = state::ShareState::Plaintext('verify_initial_share: { let Some(share) = share else { @@ -1299,6 +1328,12 @@ struct Metrics { rounds_skipped: Counter, attempts_to_read_validator_contract: Counter, + + read_players_from_v1_contract: Counter, + read_players_from_v2_contract: Counter, + + read_re_dkg_epoch_from_v1_contract: Counter, + read_re_dkg_epoch_from_v2_contract: Counter, } impl Metrics { @@ -1409,6 +1444,34 @@ impl Metrics { attempts_to_read_validator_contract.clone(), ); + let read_players_from_v1_contract = Counter::default(); + context.register( + "read_players_from_v1_contract", + "the number of times the players (or syncers) were read from the validator config v1 contract", + read_players_from_v1_contract.clone(), + ); + + let read_players_from_v2_contract = Counter::default(); + context.register( + "read_players_from_v2_contract", + "the number of times the players were read from the validator config v1 contract", + read_players_from_v2_contract.clone(), + ); + + let read_re_dkg_epoch_from_v1_contract = Counter::default(); + context.register( + "read_re_dkg_epoch_from_v1_contract", + "the number of times the next re-DKG epoch was read from the validator config v1 contract", + read_re_dkg_epoch_from_v1_contract.clone(), + ); + + let read_re_dkg_epoch_from_v2_contract = Counter::default(); + context.register( + "read_re_dkg_epoch_from_v2_contract", + "the number of times the next re-DKG epoch was read from the validator config v2 contract", + read_re_dkg_epoch_from_v2_contract.clone(), + ); + Self { syncing_players, shares_distributed, @@ -1425,6 +1488,10 @@ impl Metrics { successes, rounds_skipped, attempts_to_read_validator_contract, + read_players_from_v1_contract, + read_players_from_v2_contract, + read_re_dkg_epoch_from_v1_contract, + read_re_dkg_epoch_from_v2_contract, } } @@ -1529,8 +1596,9 @@ fn read_dealer_log( async fn read_syncers_if_v2_not_initialized_with_retry( context: &impl commonware_runtime::Clock, node: &TempoFullNode, - reference_header: &TempoHeader, + boundary_block: &Block, total_attempts: &Counter, + v1_read: &Counter, ) -> eyre::Result> { let mut attempts = 0; const MIN_RETRY: Duration = Duration::from_secs(1); @@ -1540,7 +1608,9 @@ async fn read_syncers_if_v2_not_initialized_with_retry( total_attempts.inc(); attempts += 1; - if let Ok(syncers) = read_syncers_if_v2_not_initialized(attempts, node, reference_header) { + if let Ok(syncers) = + read_syncers_if_v2_not_initialized(attempts, node, boundary_block, v1_read) + { break 'read_contract Ok(syncers); } @@ -1550,14 +1620,14 @@ async fn read_syncers_if_v2_not_initialized_with_retry( let blocks_behind = best_block .as_ref() .ok() - .map(|best| reference_header.number().saturating_sub(*best)); + .map(|best| boundary_block.number().saturating_sub(*best)); tracing::warn_span!("read_validator_config_with_retry").in_scope(|| { warn!( attempts, retry_after = %tempo_telemetry_util::display_duration(retry_after), is_syncing, best_block = %tempo_telemetry_util::display_result(&best_block), - height_if_v1 = reference_header.number(), + height_if_v1 = boundary_block.number(), blocks_behind = %tempo_telemetry_util::display_option(&blocks_behind), "reading validator config from contract failed; will retry", ); @@ -1572,6 +1642,12 @@ async fn read_syncers_if_v2_not_initialized_with_retry( /// returns an empty set because after the hardfork syncers do not need to be /// tracked. /// +/// IMPORTANT: it is expected that this function is only called on boundary +/// blocks. It relies on the fact that the next players are read from the V2 +/// smart contract at boundary - 1, i.e. not on the boundary height but one +/// before (of course assuming the contract is initialized and we are past the +/// boundary). +/// /// The implementation reads the immutable v2 initialization height to avoid /// having to read a state at a specific block: if the initializaton has already /// happened, `initialization_height <= height`, then reading validator v1 @@ -1580,31 +1656,26 @@ async fn read_syncers_if_v2_not_initialized_with_retry( skip_all, fields( attempt = _attempt, - height_if_v1 = reference_header.number(), + block.digest = %boundary_block.digest(), + block.height = %boundary_block.height(), ), err )] pub(crate) fn read_syncers_if_v2_not_initialized( _attempt: u32, node: &TempoFullNode, - reference_header: &TempoHeader, + boundary_block: &Block, + read_v1: &Counter, ) -> eyre::Result> { - let best_header = best_header(node).wrap_err("no best header available in execution layer")?; - ensure!( - best_header.number() >= reference_header.number(), - "height of best available header below reference header; node still syncing?" - ); - // Take the best available - if can_use_v2(node, &best_header) - .wrap_err("could not determine if validator config v2 can be used or not")? + if can_use_v2_at_block_hash(node, boundary_block.parent_digest().0) + .wrap_err("unable to determine if the validator config v2 can be used or not")? { - debug!("validator config v2 active and initialized; no need to explicitly track syncers"); return Ok(ordered::Set::default()); } - - let (_read_height, _read_hash, raw_validators) = read_validator_config_at_height( + read_v1.inc(); + let (_read_height, _read_hash, raw_validators) = read_validator_config_at_block_hash( node, - reference_header.number(), + boundary_block.digest().0, |config: &ValidatorConfig| { config .get_validators() @@ -1624,49 +1695,42 @@ pub(crate) fn read_syncers_if_v2_not_initialized( /// Determines the next players depending on the header timestamp identified by `digest`. /// -/// This function should be (and practically is) only called when constructing or -/// verifying a proposal. `digest` should therefore always refer to the parent -/// parent of the porposal. +/// This function should only be used when constructing or verifying a proposal. +/// `digest` should therefore always refer to the parent parent of the proposal. /// -/// It is therefore save to assume that the execution layer should always have -/// the header and state corresponding to `digest` available. +/// If the execution layer does not have a block corresponding to `digest` +/// available then it cannot propose or verify a block. fn determine_next_players( state: &State, node: &TempoFullNode, digest: Digest, + read_v2: &Counter, ) -> eyre::Result> { - let header = node - .provider - .header(digest.0) - .map_err(eyre::Report::new) - .and_then(|maybe| maybe.ok_or_eyre("hash not known")) - .wrap_err_with(|| { - format!("failed reading header for block hash `{digest}` from execution layer") - })?; - let syncers = if can_use_v2(node, &header) + let syncers = if can_use_v2_at_block_hash(node, digest.0) .wrap_err("failed determining if validator config v2 can be used")? { + read_v2.inc(); debug!("reading next players from validator config v2 contract"); - read_validator_config_at_height(node, header.number(), |config: &ValidatorConfigV2| { - let raw = config - .get_validators() - .wrap_err("failed to query contract for validator config")?; - - let decoded_validators = raw - .into_iter() - .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) - .collect::, _>>() - .wrap_err("failed decoding an entry in the on-chain validator set")?; - - ordered::Set::try_from_iter(decoded_validators.into_iter().filter_map(|validator| { - validator - .is_active_at_height(header.number()) - .then_some(validator.public_key().clone()) - })) - .wrap_err("contract contained validators with duplicate public keys") - }) + let (read_height, _, raw_validators) = + read_validator_config_at_block_hash(node, digest.0, |config: &ValidatorConfigV2| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + }) + .wrap_err("failed reading raw validator config v2")?; + + let decoded_validators = raw_validators + .into_iter() + .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) + .collect::, _>>() + .wrap_err("failed decoding an entry in v2 on-chain validator set")?; + + ordered::Set::try_from_iter(decoded_validators.into_iter().filter_map(|validator| { + validator + .is_active_at_height(read_height) + .then_some(validator.public_key().clone()) + })) .wrap_err("failed reading validator config v2")? - .2 } else { debug!("using validator config v1 syncers from state"); state.syncers.clone() @@ -1676,46 +1740,43 @@ fn determine_next_players( Ok(syncers) } -/// Reads the `nextFullDkgCeremony` epoch value from the ValidatorConfig precompile. +/// Reads the `nextFullDkgCeremony` epoch value from one of the validator config contracts. /// /// This is used to determine if the next DKG ceremony should be a full ceremony /// (new polynomial) instead of a reshare. /// +/// This function should only be used when constructing or verifying a proposal. +/// `digest` should therefore always refer to the parent parent of the proposal. /// -/// This function should be (and practically is) only called when constructing or -/// verifying a proposal. `digest` should therefore always refer to the parent -/// parent of the porposal. -/// -/// It is therefore save to assume that the execution layer should always have -/// the header and state corresponding to `digest` available. +/// If the execution layer does not have a block corresponding to `digest` +/// available then it cannot propose or verify a block. #[instrument( skip_all, fields( - at_height, + %digest, ), err, - ret(level = Level::INFO) + ret(level = Level::WARN) )] -pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, digest: Digest) -> eyre::Result { - let header = node - .provider - .header(digest.0) - .map_err(eyre::Report::new) - .and_then(|maybe| maybe.ok_or_eyre("hash not known")) - .wrap_err_with(|| { - format!("failed reading header for block hash `{digest}` from execution layer") - })?; - if can_use_v2(node, &header) +pub(crate) fn read_re_dkg_epoch( + node: &TempoFullNode, + digest: Digest, + v1_counter: &Counter, + v2_counter: &Counter, +) -> eyre::Result { + if can_use_v2_at_block_hash(node, digest.0) .wrap_err("failed determining if validator config v2 can be used")? { - read_validator_config_at_height(node, header.number(), |config: &ValidatorConfigV2| { + v2_counter.inc(); + read_validator_config_at_block_hash(node, digest.0, |config: &ValidatorConfigV2| { config .get_next_full_dkg_ceremony() .map_err(eyre::Report::new) }) .map(|(_, _, epoch)| epoch) } else { - read_validator_config_at_height(node, header.number(), |config: &ValidatorConfig| { + v1_counter.inc(); + read_validator_config_at_block_hash(node, digest.0, |config: &ValidatorConfig| { config .get_next_full_dkg_ceremony() .map_err(eyre::Report::new) @@ -1723,16 +1784,3 @@ pub(crate) fn read_re_dkg_epoch(node: &TempoFullNode, digest: Digest) -> eyre::R .map(|(_, _, epoch)| epoch) } } - -/// Returns the header corresponding to the best block number. -fn best_header(node: &TempoFullNode) -> eyre::Result { - let h = node - .provider - .best_block_number() - .wrap_err("failed reading best available block number from execution layer")?; - node.provider - .header_by_number(h) - .map_err(eyre::Report::new) - .and_then(|maybe| maybe.ok_or_eyre("header not known")) - .wrap_err("could not read header for best available block number") -} diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index f93418c480..95e5e15655 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -12,8 +12,8 @@ use eyre::{OptionExt as _, WrapErr as _}; use reth_ethereum::evm::revm::{State, database::StateProviderDatabase}; use reth_node_builder::{Block as _, ConfigureEvm as _}; use reth_provider::{ - BlockHashReader as _, BlockIdReader as _, BlockReader as _, BlockSource, - StateProviderFactory as _, + BlockHashReader as _, BlockIdReader as _, BlockNumReader as _, BlockReader as _, BlockSource, + HeaderProvider as _, StateProviderFactory as _, }; use tempo_chainspec::hardfork::TempoHardforks as _; use tempo_node::TempoFullNode; @@ -35,6 +35,18 @@ pub(crate) fn v2_initialization_height(node: &TempoFullNode, height: u64) -> eyr .map(|(_, _, activation_height)| activation_height) } +pub(crate) fn v2_initialization_height_at_block_hash( + node: &TempoFullNode, + hash: B256, +) -> eyre::Result { + read_validator_config_at_block_hash(node, hash, |config: &ValidatorConfigV2| { + config + .get_initialized_at_height() + .map_err(eyre::Report::new) + }) + .map(|(_, _, activation_height)| activation_height) +} + pub(crate) fn is_v2_initialized(node: &TempoFullNode, height: u64) -> eyre::Result { read_validator_config_at_height(node, height, |config: &ValidatorConfigV2| { config.is_initialized().map_err(eyre::Report::new) @@ -42,6 +54,16 @@ pub(crate) fn is_v2_initialized(node: &TempoFullNode, height: u64) -> eyre::Resu .map(|(_, _, activated)| activated) } +pub(crate) fn is_v2_initialized_at_block_hash( + node: &TempoFullNode, + hash: B256, +) -> eyre::Result { + read_validator_config_at_block_hash(node, hash, |config: &ValidatorConfigV2| { + config.is_initialized().map_err(eyre::Report::new) + }) + .map(|(_, _, activated)| activated) +} + /// Reads state from the ValidatorConfig precompile at a given block height. pub(crate) fn read_validator_config_at_height( node: &TempoFullNode, @@ -76,7 +98,52 @@ where let block = node .provider .find_block_by_hash(block_hash, BlockSource::Any) - .map_err(Into::::into) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("execution layer returned empty block")) + .wrap_err_with(|| format!("failed reading block with hash `{block_hash}`"))?; + + let db = State::builder() + .with_database(StateProviderDatabase::new( + node.provider + .state_by_block_hash(block_hash) + .wrap_err_with(|| { + format!("failed to get state from node provider for hash `{block_hash}`") + })?, + )) + .build(); + + let mut evm = node + .evm_config + .evm_for_block(db, block.header()) + .wrap_err("failed instantiating evm for block")?; + + let height = block.number(); + let hash = block.seal_slow().hash(); + + let ctx = evm.ctx_mut(); + let res = StorageCtx::enter_evm( + &mut ctx.journaled_state, + &ctx.block, + &ctx.cfg, + &ctx.tx, + || read_fn(&C::default()), + )?; + Ok((height, hash, res)) +} + +/// Reads the validator state at the given block hash. +pub(crate) fn read_validator_config_at_block_hash( + node: &TempoFullNode, + block_hash: B256, + read_fn: impl FnOnce(&C) -> eyre::Result, +) -> eyre::Result<(u64, B256, T)> +where + C: Default, +{ + let block = node + .provider + .find_block_by_hash(block_hash, BlockSource::Any) + .map_err(eyre::Report::new) .and_then(|maybe| maybe.ok_or_eyre("execution layer returned empty block")) .wrap_err_with(|| format!("failed reading block with hash `{block_hash}`"))?; @@ -114,6 +181,27 @@ pub(crate) enum Validators { V2(ordered::Map), } +/// Returns if the validator config v2 is initialized at `height`. +pub(crate) fn is_v2_initialized_at_height(node: &TempoFullNode, height: u64) -> eyre::Result { + let h = node + .provider + .best_block_number() + .wrap_err("failed reading best available block number from execution layer")?; + let hash = node + .provider + .block_hash(h) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("header not known")) + .wrap_err("could not read hash for best available block number")?; + let initialization_height = v2_initialization_height_at_block_hash(node, hash) + .wrap_err("failed reading validator config v2 initialization height")?; + match initialization_height { + 0 => is_v2_initialized_at_block_hash(node, hash) + .wrap_err("failed reading initialization flag"), + n => Ok(n <= height), + } +} + /// Returns if the validator config v2 can be used exactly at the heigth and /// timestamp of `header`. /// @@ -134,6 +222,34 @@ pub(crate) fn can_use_v2(node: &TempoFullNode, header: &TempoHeader) -> eyre::Re <= header.number()) } +/// Returns if the validator config v2 can be used exactly at the heigth and +/// timestamp of `header`. +/// +/// +/// Validator Config V2 can be used if: +/// +/// 1. if `header.timestamp` is active at the hardfork timestamp. +/// 2. if `header.number` is equal or greater than the contract initialization height. +/// 3. if the contract initialization flag is set. +pub(crate) fn can_use_v2_at_block_hash(node: &TempoFullNode, hash: B256) -> eyre::Result { + let header = node + .provider + .header(hash) + .map_err(eyre::Report::new) + .and_then(|maybe| maybe.ok_or_eyre("hash not known")) + .wrap_err_with(|| { + format!("failed reading header for block hash `{hash}` from execution layer") + })?; + Ok(node + .chain_spec() + .is_t2_active_at_timestamp(header.timestamp()) + && is_v2_initialized_at_block_hash(node, hash) + .wrap_err("failed reading validator config v2 initialization flag")? + && v2_initialization_height_at_block_hash(node, hash) + .wrap_err("failed reading validator config v2 initialization height")? + <= header.number()) +} + /// Reads the validator config at `read_height`. /// /// Uses `reference_header` to determine whether to read validators from diff --git a/crates/e2e/src/tests/dkg/static_transitions.rs b/crates/e2e/src/tests/dkg/static_transitions/mod.rs similarity index 100% rename from crates/e2e/src/tests/dkg/static_transitions.rs rename to crates/e2e/src/tests/dkg/static_transitions/mod.rs From 1309adb22c9ebc1baf5f408fb5c409b5292164d2 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Wed, 18 Feb 2026 15:52:06 +0100 Subject: [PATCH 07/11] typos found by @joshie Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com> --- .../commonware-node/src/consensus/application/actor.rs | 4 ++-- crates/commonware-node/src/peer_manager/actor.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/commonware-node/src/consensus/application/actor.rs b/crates/commonware-node/src/consensus/application/actor.rs index f248a7d252..15c9bdb79f 100644 --- a/crates/commonware-node/src/consensus/application/actor.rs +++ b/crates/commonware-node/src/consensus/application/actor.rs @@ -517,8 +517,8 @@ impl Inner { %outcome.epoch, outcome.network_identity = %outcome.network_identity(), outcome.dealers = ?outcome.dealers(), - outcome.dealers = ?outcome.players(), - outcome.dealers = ?outcome.next_players(), + outcome.players = ?outcome.players(), + outcome.next_players = ?outcome.next_players(), "received DKG outcome; will include in payload builder attributes", ); outcome.encode().into() diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index a85ea7bc3a..bac3f2d8a5 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -186,7 +186,7 @@ where read_height, %read_hash, ?peers, - "bootstrapped initial peer set from last boundary block and best execution layer bock", + "bootstrapped initial peer set from last boundary block and best execution layer block", ); let header = self @@ -195,17 +195,17 @@ where .header(read_hash) .expect("must be access execution layer to get header - just read validator config for") .expect("execution layer must have the header - just read validator config for it"); - let last_tracked_peer_seet = LastTrackedPeerSet { + let last_tracked_peer_set = LastTrackedPeerSet { height: header.number(), peers, }; self.oracle .track( - last_tracked_peer_seet.height, - last_tracked_peer_seet.peers.clone(), + last_tracked_peer_set.height, + last_tracked_peer_set.peers.clone(), ) .await; - self.last_tracked_peer_set = Some(last_tracked_peer_seet); + self.last_tracked_peer_set = Some(last_tracked_peer_set); Ok(()) } From ed82cd3848d697876ebeac9d357e7dc6eb85bcdd Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Wed, 18 Feb 2026 19:58:30 +0100 Subject: [PATCH 08/11] outcome.players are the current dealers, outcome.dealers are the *old* dealers --- crates/commonware-node/src/peer_manager/actor.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index bac3f2d8a5..bf7c69260b 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -346,10 +346,9 @@ pub(crate) fn construct_peer_set( validators: &Validators, ) -> commonware_utils::ordered::Map { // Dealers are output.players() from the previous epoch's DKG output. - // Players are outcome.next_players (the players for the next DKG round). - // Syncers are all currently active validators. + // Players are outcome.next_players. let all_keys = outcome - .dealers() + .players() .iter() .chain(outcome.next_players().iter()) .chain(match validators { From 052ef6b147ca832344aa270a1ad5f8c17181c0cd Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Thu, 19 Feb 2026 13:19:43 +0100 Subject: [PATCH 09/11] peer manager reads finalized block only after executor delivered; read blocks at hashes; add metrics --- .../commonware-node/src/consensus/engine.rs | 23 ++--- .../src/dkg/manager/actor/mod.rs | 14 +-- crates/commonware-node/src/executor/actor.rs | 38 ++++++-- .../commonware-node/src/executor/ingress.rs | 29 +++++- .../commonware-node/src/peer_manager/actor.rs | 34 ++++++- .../commonware-node/src/peer_manager/mod.rs | 5 ++ crates/commonware-node/src/validators.rs | 89 ++++++++++++++++--- 7 files changed, 188 insertions(+), 44 deletions(-) diff --git a/crates/commonware-node/src/consensus/engine.rs b/crates/commonware-node/src/consensus/engine.rs index a2d8eea55b..08c8308b5e 100644 --- a/crates/commonware-node/src/consensus/engine.rs +++ b/crates/commonware-node/src/consensus/engine.rs @@ -279,10 +279,22 @@ where ) .await; + let (executor, executor_mailbox) = crate::executor::init( + context.with_label("executor"), + crate::executor::Config { + execution_node: execution_node.clone(), + last_finalized_height, + marshal: marshal_mailbox.clone(), + fcu_heartbeat_interval: self.fcu_heartbeat_interval, + }, + ) + .wrap_err("failed initialization executor actor")?; + let (peer_manager, peer_manager_mailbox) = peer_manager::init( context.with_label("peer_manager"), peer_manager::Config { execution_node: execution_node.clone(), + executor: executor_mailbox.clone(), oracle: self.peer_manager.clone(), epoch_strategy: epoch_strategy.clone(), last_finalized_height, @@ -322,17 +334,6 @@ where self.feed_state, ); - let (executor, executor_mailbox) = crate::executor::init( - context.with_label("executor"), - crate::executor::Config { - execution_node: execution_node.clone(), - last_finalized_height, - marshal: marshal_mailbox.clone(), - fcu_heartbeat_interval: self.fcu_heartbeat_interval, - }, - ) - .wrap_err("failed initialization executor actor")?; - let (application, application_mailbox) = application::init(super::application::Config { context: context.with_label("application"), fee_recipient: self.fee_recipient, diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index b52f09a2ce..7d800a3f00 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -1144,7 +1144,7 @@ where ); let next_players = determine_next_players( - &state, + state, &self.config.execution_node, request.digest, &self.metrics.read_players_from_v2_contract, @@ -1247,7 +1247,7 @@ where let syncers = if node .chain_spec() .is_t2_active_at_timestamp(header.timestamp()) - && is_v2_initialized_at_height(&node, header.number()) + && is_v2_initialized_at_height(node, header.number()) .wrap_err("unable to determine if v2 contract is already initialized")? { ordered::Set::default() @@ -1649,7 +1649,7 @@ async fn read_syncers_if_v2_not_initialized_with_retry( /// boundary). /// /// The implementation reads the immutable v2 initialization height to avoid -/// having to read a state at a specific block: if the initializaton has already +/// having to read a state at a specific block: if the initialization has already /// happened, `initialization_height <= height`, then reading validator v1 /// is skipped and an empty list returned. #[instrument( @@ -1706,7 +1706,7 @@ fn determine_next_players( digest: Digest, read_v2: &Counter, ) -> eyre::Result> { - let syncers = if can_use_v2_at_block_hash(node, digest.0) + let next_players = if can_use_v2_at_block_hash(node, digest.0) .wrap_err("failed determining if validator config v2 can be used")? { read_v2.inc(); @@ -1721,7 +1721,7 @@ fn determine_next_players( let decoded_validators = raw_validators .into_iter() - .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) + .map(DecodedValidatorV2::decode_from_contract) .collect::, _>>() .wrap_err("failed decoding an entry in v2 on-chain validator set")?; @@ -1736,8 +1736,8 @@ fn determine_next_players( state.syncers.clone() }; - debug!(?syncers, "determined syncers"); - Ok(syncers) + debug!(?next_players, "determined next players"); + Ok(next_players) } /// Reads the `nextFullDkgCeremony` epoch value from one of the validator config contracts. diff --git a/crates/commonware-node/src/executor/actor.rs b/crates/commonware-node/src/executor/actor.rs index e6431daec4..405358286f 100644 --- a/crates/commonware-node/src/executor/actor.rs +++ b/crates/commonware-node/src/executor/actor.rs @@ -4,7 +4,7 @@ //! execution layer and tracks the digest of the latest finalized block. //! It also advances the canonical chain by sending forkchoice-updates. -use std::{pin::Pin, sync::Arc, time::Duration}; +use std::{collections::BTreeMap, pin::Pin, sync::Arc, time::Duration}; use alloy_rpc_types_engine::ForkchoiceState; use commonware_consensus::{Heightable as _, marshal::Update, types::Height}; @@ -28,13 +28,11 @@ use tracing::{ Level, Span, debug, error, error_span, info, info_span, instrument, warn, warn_span, }; -use crate::{ - consensus::{Digest, block::Block}, - executor::{ - Config, - ingress::{CanonicalizeHead, Command, Message}, - }, +use super::{ + Config, + ingress::{CanonicalizeHead, Command, Message, SubscribeFinalized}, }; +use crate::consensus::{Digest, block::Block}; /// Tracks the last forkchoice state that the executor sent to the execution layer. /// @@ -116,6 +114,10 @@ pub(crate) struct Actor { /// The timer for the next FCU heartbeat. Reset whenever an FCU is sent. fcu_heartbeat_timer: Pin + Send>>, + + /// A list of subscriptions waiting for the executor to finalize a block + /// at a given height. + pending_finalized_subscriptions: BTreeMap>>, } impl Actor @@ -164,6 +166,7 @@ where }, fcu_heartbeat_interval, fcu_heartbeat_timer, + pending_finalized_subscriptions: BTreeMap::new(), }) } @@ -319,6 +322,16 @@ where .await .wrap_err("failed handling finalization")?; } + Command::SubscribeFinalized(SubscribeFinalized { height, response }) => { + if self.last_canonicalized.finalized_height >= height { + let _ = response.send(()); + } else { + self.pending_finalized_subscriptions + .entry(height) + .or_default() + .push(response); + } + } } Ok(()) } @@ -449,6 +462,7 @@ where block: Block, acknowledgment: Exact, ) -> eyre::Result<()> { + let height = block.height(); self.canonicalize( Span::current(), HeadOrFinalized::Finalized, @@ -484,6 +498,16 @@ where acknowledgment.acknowledge(); + self.pending_finalized_subscriptions.retain(|&key, value| { + let retain = key > height; + if !retain { + value.drain(..).for_each(|tx| { + let _ = tx.send(()); + }); + } + retain + }); + Ok(()) } } diff --git a/crates/commonware-node/src/executor/ingress.rs b/crates/commonware-node/src/executor/ingress.rs index 95a2b173d6..8b7111043b 100644 --- a/crates/commonware-node/src/executor/ingress.rs +++ b/crates/commonware-node/src/executor/ingress.rs @@ -1,5 +1,5 @@ use commonware_consensus::{Reporter, marshal::Update, types::Height}; -use eyre::WrapErr as _; +use eyre::{WrapErr as _, eyre}; use futures::{ SinkExt as _, channel::{mpsc, oneshot}, @@ -31,6 +31,19 @@ impl Mailbox { Ok(rx) } + + pub(crate) async fn subscribe_finalized(&self, height: Height) -> eyre::Result<()> { + let (response, rx) = oneshot::channel(); + self.inner + .unbounded_send(Message::in_current_span(SubscribeFinalized { + height, + response, + })) + .wrap_err( + "failed sending subscribe finalized request to agent, this means it exited", + )?; + rx.await.map_err(|_| eyre!("actor dropped response")) + } } #[derive(Debug)] @@ -54,6 +67,8 @@ pub(super) enum Command { CanonicalizeHead(CanonicalizeHead), /// Requests the agent to forward a finalization event to the execution layer. Finalize(Box>), + /// Returns once the executor actor has finalized the block at `height`. + SubscribeFinalized(SubscribeFinalized), } #[derive(Debug)] @@ -63,6 +78,12 @@ pub(super) struct CanonicalizeHead { pub(super) ack: oneshot::Sender<()>, } +#[derive(Debug)] +pub(super) struct SubscribeFinalized { + pub(super) height: Height, + pub(super) response: oneshot::Sender<()>, +} + impl From for Command { fn from(value: CanonicalizeHead) -> Self { Self::CanonicalizeHead(value) @@ -75,6 +96,12 @@ impl From> for Command { } } +impl From for Command { + fn from(value: SubscribeFinalized) -> Self { + Self::SubscribeFinalized(value) + } +} + impl Reporter for Mailbox { type Activity = Update; diff --git a/crates/commonware-node/src/peer_manager/actor.rs b/crates/commonware-node/src/peer_manager/actor.rs index bf7c69260b..806f330e34 100644 --- a/crates/commonware-node/src/peer_manager/actor.rs +++ b/crates/commonware-node/src/peer_manager/actor.rs @@ -22,7 +22,7 @@ use tempo_chainspec::hardfork::TempoHardforks as _; use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use tempo_node::TempoFullNode; use tempo_primitives::TempoHeader; -use tracing::{Level, Span, error, info, info_span, instrument, warn}; +use tracing::{Level, Span, debug, error, info, info_span, instrument, warn}; use crate::{ consensus::block::Block, @@ -39,6 +39,7 @@ where oracle: TPeerManager, execution_node: TempoFullNode, + executor: crate::executor::Mailbox, epoch_strategy: FixedEpocher, last_finalized_height: Height, mailbox: mpsc::UnboundedReceiver, @@ -58,6 +59,7 @@ where super::Config { oracle, execution_node, + executor, epoch_strategy, last_finalized_height, }: super::Config, @@ -73,6 +75,7 @@ where context: ContextCell::new(context), oracle, execution_node, + executor, epoch_strategy, last_finalized_height, mailbox, @@ -175,6 +178,11 @@ where read_validator_config(&self.execution_node, &header) .wrap_err("unable to read initial peer set from execution layer")?; + let source = match &all_validators { + Validators::V1(_) => "Validator Config V1", + Validators::V2(_) => "Validator Config V2", + }; + debug!(source, "read validators from chain"); let peers = construct_peer_set(&onchain_outcome, &all_validators); self.peers.set(peers.len() as i64); @@ -292,11 +300,20 @@ where let onchain_outcome = OnchainDkgOutcome::read(&mut extra_data.as_ref()) .wrap_err("could not read DKG outcome from boundary block")?; + if let Err(reason) = self.executor.subscribe_finalized(block.height()).await { + warn!( + %reason, + "unable to clarify whether the finalized block was already \ + forwarded to execution layer; will try to read validator \ + config contract, but it will likely fail", + ); + } + let (_read_height, _read_hash, all_validators) = - match read_validator_config(&self.execution_node, block.header()) { + match read_validator_config_at_hash(&self.execution_node, block.hash()) { Ok(ret) => ret, Err(reason) => { - info!( + warn!( %reason, execution_layer.status = self.execution_node.network.is_syncing(), "unable to read validator config; will retry on next block", @@ -370,7 +387,7 @@ pub(crate) fn construct_peer_set( vals.get_value(key) .expect( "all DKG participants must have an entry in the \ - unfiltered, contract validator set", + unfiltered, contract validator set", ) .outbound, ), @@ -417,3 +434,12 @@ fn read_validator_config( validators::read_from_contract_at_height(1, node, best_block_number, reference_header) .wrap_err("unable to read validators from best block") } + +#[instrument(skip_all, err(level = Level::INFO))] +fn read_validator_config_at_hash( + node: &TempoFullNode, + block_hash: B256, +) -> eyre::Result<(u64, B256, Validators)> { + validators::read_from_contract_at_block_hash(1, node, block_hash) + .wrap_err("unable to read validators from best block") +} diff --git a/crates/commonware-node/src/peer_manager/mod.rs b/crates/commonware-node/src/peer_manager/mod.rs index 7bfe20bc78..cef85dc0b5 100644 --- a/crates/commonware-node/src/peer_manager/mod.rs +++ b/crates/commonware-node/src/peer_manager/mod.rs @@ -39,6 +39,8 @@ mod ingress; pub(crate) use actor::Actor; pub(crate) use ingress::Mailbox; +use crate::executor; + /// Configuration of the peer manager actor. pub(crate) struct Config { /// The mailbox to the P2P network to register the peer sets. @@ -46,6 +48,9 @@ pub(crate) struct Config { /// A handle to the full execution node to read block headers and look up /// the Validator Config contract pub(crate) execution_node: TempoFullNode, + /// The mailbox to the executor actor. Used to check if the executor has + /// already finalized a block at a given height. + pub(crate) executor: executor::Mailbox, /// The epoch strategy used by the node. pub(crate) epoch_strategy: FixedEpocher, /// The last finalized height according to the consensus layer (marshal). diff --git a/crates/commonware-node/src/validators.rs b/crates/commonware-node/src/validators.rs index 95e5e15655..8aef106c95 100644 --- a/crates/commonware-node/src/validators.rs +++ b/crates/commonware-node/src/validators.rs @@ -140,11 +140,11 @@ pub(crate) fn read_validator_config_at_block_hash( where C: Default, { - let block = node + let header = node .provider - .find_block_by_hash(block_hash, BlockSource::Any) + .header(block_hash) .map_err(eyre::Report::new) - .and_then(|maybe| maybe.ok_or_eyre("execution layer returned empty block")) + .and_then(|maybe| maybe.ok_or_eyre("execution layer returned empty header")) .wrap_err_with(|| format!("failed reading block with hash `{block_hash}`"))?; let db = State::builder() @@ -159,12 +159,9 @@ where let mut evm = node .evm_config - .evm_for_block(db, block.header()) + .evm_for_block(db, &header) .wrap_err("failed instantiating evm for block")?; - let height = block.number(); - let hash = block.seal_slow().hash(); - let ctx = evm.ctx_mut(); let res = StorageCtx::enter_evm( &mut ctx.journaled_state, @@ -173,7 +170,7 @@ where &ctx.tx, || read_fn(&C::default()), )?; - Ok((height, hash, res)) + Ok((header.number(), block_hash, res)) } pub(crate) enum Validators { @@ -202,10 +199,9 @@ pub(crate) fn is_v2_initialized_at_height(node: &TempoFullNode, height: u64) -> } } -/// Returns if the validator config v2 can be used exactly at the heigth and +/// Returns if the validator config v2 can be used exactly at the height and /// timestamp of `header`. /// -/// /// Validator Config V2 can be used if: /// /// 1. if `header.timestamp` is active at the hardfork timestamp. @@ -222,9 +218,8 @@ pub(crate) fn can_use_v2(node: &TempoFullNode, header: &TempoHeader) -> eyre::Re <= header.number()) } -/// Returns if the validator config v2 can be used exactly at the heigth and -/// timestamp of `header`. -/// +/// Returns if the validator config v2 can be used exactly at `hash` and the +/// timestamp of the corresponding `header`. /// /// Validator Config V2 can be used if: /// @@ -287,7 +282,7 @@ pub(crate) fn read_from_contract_at_height( let decoded_validators = raw_validators .into_iter() - .map(|raw| DecodedValidatorV2::decode_from_contract(raw)) + .map(DecodedValidatorV2::decode_from_contract) .collect::, _>>() .wrap_err("failed an entry in the on-chain validator set")?; @@ -323,6 +318,72 @@ pub(crate) fn read_from_contract_at_height( Ok(vals) } +#[instrument( + skip_all, + fields( + attempt = _attempt, + %block_hash, + ), + err(level = Level::WARN), +)] +pub(crate) fn read_from_contract_at_block_hash( + _attempt: u32, + node: &TempoFullNode, + block_hash: B256, +) -> eyre::Result<(u64, B256, Validators)> { + let vals = if can_use_v2_at_block_hash(node, block_hash) + .wrap_err("failed to determine if the v2 validator config contract can be used")? + { + let (read_height, hash, raw_validators) = + read_validator_config_at_block_hash(node, block_hash, |config: &ValidatorConfigV2| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + })?; + + info!( + ?raw_validators, + "read validators from validator config v2 contract", + ); + + let decoded_validators = raw_validators + .into_iter() + .map(DecodedValidatorV2::decode_from_contract) + .collect::, _>>() + .wrap_err("failed an entry in the on-chain validator set")?; + + ( + read_height, + hash, + Validators::V2( + ordered::Map::try_from_iter( + decoded_validators + .into_iter() + .map(|validator| (validator.public_key.clone(), validator)), + ) + .wrap_err("contract contained validators with duplicate public keys")?, + ), + ) + } else { + let (read_height, hash, raw_validators) = + read_validator_config_at_block_hash(node, block_hash, |config: &ValidatorConfig| { + config + .get_validators() + .wrap_err("failed to query contract for validator config") + })?; + info!( + ?raw_validators, + "read validators from validator config v1 contract", + ); + ( + read_height, + hash, + Validators::V1(decode_from_contract(raw_validators)), + ) + }; + Ok(vals) +} + #[instrument(skip_all, fields(validators_to_decode = contract_vals.len()))] pub(crate) fn decode_from_contract( contract_vals: Vec, From 21ff405782e30b3be32767ff3f96abc6fb825609 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Thu, 19 Feb 2026 13:23:36 +0100 Subject: [PATCH 10/11] nits --- crates/commonware-node/src/dkg/manager/actor/mod.rs | 2 +- crates/e2e/src/tests/backfill.rs | 2 +- crates/e2e/src/tests/dkg/share_loss.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index 7d800a3f00..7517890fcf 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -1454,7 +1454,7 @@ impl Metrics { let read_players_from_v2_contract = Counter::default(); context.register( "read_players_from_v2_contract", - "the number of times the players were read from the validator config v1 contract", + "the number of times the players were read from the validator config v2 contract", read_players_from_v2_contract.clone(), ); diff --git a/crates/e2e/src/tests/backfill.rs b/crates/e2e/src/tests/backfill.rs index c7332f5995..df9a98c2b0 100644 --- a/crates/e2e/src/tests/backfill.rs +++ b/crates/e2e/src/tests/backfill.rs @@ -93,7 +93,7 @@ fn assert_no_new_epoch(context: &impl Metrics, max_epoch: u64) { let metric = parts.next().unwrap(); let value = parts.next().unwrap(); - if metrics.ends_with("_peers_blocked") { + if metric.ends_with("_peers_blocked") { let value = value.parse::().unwrap(); assert_eq!(value, 0); } diff --git a/crates/e2e/src/tests/dkg/share_loss.rs b/crates/e2e/src/tests/dkg/share_loss.rs index 56c416f124..38f00c1640 100644 --- a/crates/e2e/src/tests/dkg/share_loss.rs +++ b/crates/e2e/src/tests/dkg/share_loss.rs @@ -57,7 +57,7 @@ fn validator_lost_share_but_gets_share_in_next_epoch() { let metric = parts.next().unwrap(); let value = parts.next().unwrap(); - if metrics.ends_with("_peers_blocked") { + if metric.ends_with("_peers_blocked") { let value = value.parse::().unwrap(); assert_eq!(value, 0); } From 2d9c9bf607d4ec28e03b0970afdf145e17d1d666 Mon Sep 17 00:00:00 2001 From: Richard Janis Goldschmidt Date: Mon, 23 Feb 2026 13:45:44 +0100 Subject: [PATCH 11/11] feat(e2e): test static V1 -> V2 migrations, add full test suite for V2 val config at genesis (#2750) Adds tests for DKG scenarios that involve a static set of validators and have a transition from the V1 to V2 validator configs. Also adds a mechanism to wait for the executor to send a finalized block at a given height to the execution layer - this is to fix the problem of the peer manager attempting to read EL state before the block was submitted to the EL. --- Cargo.lock | 1 + .../src/dkg/manager/actor/mod.rs | 4 +- .../src/dkg/manager/actor/state.rs | 30 +- crates/e2e/Cargo.toml | 1 + crates/e2e/src/execution_runtime.rs | 665 ++++++++++++++++-- crates/e2e/src/lib.rs | 181 +++-- crates/e2e/src/testing_node.rs | 58 +- crates/e2e/src/tests/consensus_rpc.rs | 10 +- crates/e2e/src/tests/dkg/common.rs | 15 + crates/e2e/src/tests/dkg/full_ceremony.rs | 16 +- .../mod.rs => static_transitions.rs} | 0 .../tests/migration_from_v1_to_v2/dkg/mod.rs | 1 + .../dkg/static_sets.rs | 204 ++++++ .../src/tests/migration_from_v1_to_v2/mod.rs | 1 + crates/e2e/src/tests/mod.rs | 4 +- crates/e2e/src/tests/sync.rs | 4 +- .../e2e/src/tests/v2_at_genesis/backfill.rs | 134 ++++ .../src/tests/v2_at_genesis/consensus_rpc.rs | 299 ++++++++ .../e2e/src/tests/v2_at_genesis/dkg/common.rs | 140 ++++ .../src/tests/v2_at_genesis/dkg/dynamic.rs | 302 ++++++++ .../dkg/fast_sync_after_full_dkg.rs | 133 ++++ .../tests/v2_at_genesis/dkg/full_ceremony.rs | 140 ++++ crates/e2e/src/tests/v2_at_genesis/dkg/mod.rs | 8 + .../src/tests/v2_at_genesis/dkg/share_loss.rs | 93 +++ .../v2_at_genesis/dkg/static_transitions.rs | 121 ++++ crates/e2e/src/tests/v2_at_genesis/mod.rs | 30 + crates/e2e/src/tests/v2_at_genesis/restart.rs | 544 ++++++++++++++ crates/e2e/src/tests/v2_at_genesis/simple.rs | 96 +++ .../e2e/src/tests/v2_at_genesis/snapshot.rs | 510 ++++++++++++++ .../e2e/src/tests/v2_at_genesis/subblocks.rs | 442 ++++++++++++ .../src/validator_config_v2/mod.rs | 1 - 31 files changed, 4046 insertions(+), 142 deletions(-) rename crates/e2e/src/tests/dkg/{static_transitions/mod.rs => static_transitions.rs} (100%) create mode 100644 crates/e2e/src/tests/migration_from_v1_to_v2/dkg/mod.rs create mode 100644 crates/e2e/src/tests/migration_from_v1_to_v2/dkg/static_sets.rs create mode 100644 crates/e2e/src/tests/migration_from_v1_to_v2/mod.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/backfill.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/consensus_rpc.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/common.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/dynamic.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/fast_sync_after_full_dkg.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/full_ceremony.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/mod.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/share_loss.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/dkg/static_transitions.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/mod.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/restart.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/simple.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/snapshot.rs create mode 100644 crates/e2e/src/tests/v2_at_genesis/subblocks.rs diff --git a/Cargo.lock b/Cargo.lock index d7141e7439..5e3d85354a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11942,6 +11942,7 @@ dependencies = [ "itertools 0.14.0", "jsonrpsee", "rand 0.8.5", + "reth-chainspec", "reth-db", "reth-ethereum", "reth-network-peers", diff --git a/crates/commonware-node/src/dkg/manager/actor/mod.rs b/crates/commonware-node/src/dkg/manager/actor/mod.rs index 7517890fcf..b46c20c3e8 100644 --- a/crates/commonware-node/src/dkg/manager/actor/mod.rs +++ b/crates/commonware-node/src/dkg/manager/actor/mod.rs @@ -1755,8 +1755,8 @@ fn determine_next_players( fields( %digest, ), - err, - ret(level = Level::WARN) + err(level = Level::WARN) + ret, )] pub(crate) fn read_re_dkg_epoch( node: &TempoFullNode, diff --git a/crates/commonware-node/src/dkg/manager/actor/state.rs b/crates/commonware-node/src/dkg/manager/actor/state.rs index dd0097c10b..60f026b362 100644 --- a/crates/commonware-node/src/dkg/manager/actor/state.rs +++ b/crates/commonware-node/src/dkg/manager/actor/state.rs @@ -766,14 +766,13 @@ impl Read for State { buf: &mut impl bytes::Buf, cfg: &Self::Cfg, ) -> Result { - let range_cfg = RangeCfg::from(1..=(u16::MAX as usize)); Ok(Self { epoch: ReadExt::read(buf)?, seed: ReadExt::read(buf)?, output: Read::read_cfg(buf, cfg)?, share: ReadExt::read(buf)?, - players: Read::read_cfg(buf, &(range_cfg, ()))?, - syncers: Read::read_cfg(buf, &(range_cfg, ()))?, + players: Read::read_cfg(buf, &(RangeCfg::from(1..=(u16::MAX as usize)), ()))?, + syncers: Read::read_cfg(buf, &(RangeCfg::from(0..=(u16::MAX as usize)), ()))?, is_full_dkg: ReadExt::read(buf)?, }) } @@ -1338,6 +1337,7 @@ impl ReducedBlock { #[cfg(test)] mod tests { use super::*; + use commonware_codec::Encode as _; use commonware_cryptography::{ bls12381::{dkg, primitives::sharing::Mode}, ed25519::PrivateKey, @@ -1394,6 +1394,30 @@ mod tests { } } + #[test] + fn state_round_trip_with() { + let executor = deterministic::Runner::default(); + executor.start(|mut context| async move { + let state = make_test_state(&mut context, 0); + let mut bytes = state.encode(); + assert_eq!( + state, + State::read_cfg(&mut bytes, &NZU32!(u32::MAX)).unwrap(), + ); + + let state_without_syncers = { + let mut s = make_test_state(&mut context, 0); + s.syncers = Default::default(); + s + }; + let mut bytes = state_without_syncers.encode(); + assert_eq!( + state_without_syncers, + State::read_cfg(&mut bytes, &NZU32!(u32::MAX)).unwrap(), + ); + }); + } + #[test] fn states_migration_migrates_last_two() { let executor = deterministic::Runner::default(); diff --git a/crates/e2e/Cargo.toml b/crates/e2e/Cargo.toml index 66d17b35be..a6dcab7c9e 100644 --- a/crates/e2e/Cargo.toml +++ b/crates/e2e/Cargo.toml @@ -32,6 +32,7 @@ commonware-utils.workspace = true itertools.workspace = true eyre.workspace = true +reth-chainspec.workspace = true reth-db.workspace = true reth-ethereum = { workspace = true, features = [ "node", diff --git a/crates/e2e/src/execution_runtime.rs b/crates/e2e/src/execution_runtime.rs index 62275c8883..0e2b55526d 100644 --- a/crates/e2e/src/execution_runtime.rs +++ b/crates/e2e/src/execution_runtime.rs @@ -1,6 +1,6 @@ //! The environment to launch tempo execution nodes in. use std::{ - net::SocketAddr, + net::{IpAddr, SocketAddr}, path::{Path, PathBuf}, sync::Arc, time::Duration, @@ -14,12 +14,17 @@ use alloy::{ }; use alloy_evm::{EvmFactory as _, revm::inspector::JournalExt as _}; use alloy_genesis::{Genesis, GenesisAccount}; -use alloy_primitives::{Address, B256}; +use alloy_primitives::{Address, B256, Keccak256, U256}; use commonware_codec::Encode; -use commonware_cryptography::ed25519::PublicKey; +use commonware_cryptography::{ + Signer, + ed25519::{PrivateKey, PublicKey, Signature}, +}; +use commonware_runtime::Clock; use commonware_utils::ordered; use eyre::{OptionExt as _, WrapErr as _}; use futures::{StreamExt, future::BoxFuture}; +use reth_chainspec::EthChainSpec; use reth_db::mdbx::DatabaseEnv; use reth_ethereum::{ evm::{ @@ -55,10 +60,16 @@ use tempo_node::{ rpc::consensus::{TempoConsensusApiServer, TempoConsensusRpc}, }; use tempo_precompiles::{ - VALIDATOR_CONFIG_ADDRESS, + VALIDATOR_CONFIG_ADDRESS, VALIDATOR_CONFIG_V2_ADDRESS, storage::StorageCtx, validator_config::{IValidatorConfig, ValidatorConfig}, + validator_config_v2::{ + IValidatorConfigV2, VALIDATOR_NS_ADD, VALIDATOR_NS_ROTATE, ValidatorConfigV2, + }, }; +use tokio::sync::oneshot; + +use crate::{ConsensusNodeConfig, TestingNode}; const ADMIN_INDEX: u32 = 0; const VALIDATOR_START_INDEX: u32 = 1; @@ -70,7 +81,8 @@ pub const TEST_MNEMONIC: &str = "test test test test test test test test test te pub struct Builder { epoch_length: Option, initial_dkg_outcome: Option, - validators: Option>, + t2_time: Option, + validators: Option>, } impl Builder { @@ -78,6 +90,7 @@ impl Builder { Self { epoch_length: None, initial_dkg_outcome: None, + t2_time: None, validators: None, } } @@ -96,41 +109,62 @@ impl Builder { } } - pub fn with_validators( - self, - validators: ordered::Map, - ) -> Self { + pub fn with_validators(self, validators: ordered::Map) -> Self { Self { validators: Some(validators), ..self } } + pub fn with_t2_time(self, t2_time: u64) -> Self { + Self { + t2_time: Some(t2_time), + ..self + } + } + pub fn launch(self) -> eyre::Result { let Self { epoch_length, initial_dkg_outcome, + t2_time, validators, } = self; let epoch_length = epoch_length.ok_or_eyre("must specify epoch length")?; let initial_dkg_outcome = initial_dkg_outcome.ok_or_eyre("must specify initial DKG outcome")?; + let t2_time = t2_time.ok_or_eyre("must specify t2 time")?; let validators = validators.ok_or_eyre("must specify validators")?; - assert!(initial_dkg_outcome.next_players() == validators.keys(),); + assert_eq!( + initial_dkg_outcome.next_players(), + &ordered::Set::from_iter_dedup( + validators + .iter_pairs() + .filter_map(|(key, val)| val.share.is_some().then_some(key.clone())) + ) + ); let mut genesis = genesis(); genesis .config .extra_fields .insert_value("epochLength".to_string(), epoch_length) - .wrap_err("failed to insert epoch length into genesis")?; + .unwrap(); + genesis + .config + .extra_fields + .insert_value("t2Time".to_string(), t2_time) + .unwrap(); genesis.extra_data = initial_dkg_outcome.encode().to_vec().into(); - let mut evm = setup_tempo_evm(); + // Just remove whatever is already written into chainspec. + genesis.alloc.remove(&VALIDATOR_CONFIG_ADDRESS); + genesis.alloc.remove(&VALIDATOR_CONFIG_V2_ADDRESS); + let mut evm = setup_tempo_evm(genesis.config.chain_id); { let cx = evm.ctx_mut(); StorageCtx::enter_evm(&mut cx.journaled_state, &cx.block, &cx.cfg, &cx.tx, || { @@ -138,23 +172,64 @@ impl Builder { let mut validator_config = ValidatorConfig::new(); validator_config .initialize(admin()) - .wrap_err("Failed to initialize validator config") + .wrap_err("failed to initialize validator config v1") .unwrap(); - for (peer, (net_addr, chain_addr)) in validators.iter_pairs() { - validator_config - .add_validator( - admin(), - IValidatorConfig::addValidatorCall { - newValidatorAddress: *chain_addr, - publicKey: peer.encode().as_ref().try_into().unwrap(), - active: true, - inboundAddress: net_addr.to_string(), - outboundAddress: net_addr.to_string(), - }, - ) + let mut validator_config_v2 = ValidatorConfigV2::new(); + if t2_time == 0 { + validator_config_v2 + .initialize(admin()) + .wrap_err("failed to initialize validator config v2") .unwrap(); } + + for (public_key, validator) in validators { + if let ConsensusNodeConfig { + address, + ingress, + egress, + private_key, + share: Some(_), + } = validator + { + validator_config + .add_validator( + admin(), + IValidatorConfig::addValidatorCall { + newValidatorAddress: address, + publicKey: public_key.encode().as_ref().try_into().unwrap(), + active: true, + inboundAddress: ingress.to_string(), + outboundAddress: egress.to_string(), + }, + ) + .unwrap(); + + if t2_time == 0 { + validator_config_v2 + .add_validator( + admin(), + IValidatorConfigV2::addValidatorCall { + validatorAddress: address, + publicKey: public_key.encode().as_ref().try_into().unwrap(), + ingress: ingress.to_string(), + egress: egress.ip().to_string(), + signature: sign_add_validator_args( + genesis.config.chain_id, + &private_key, + address, + ingress, + egress.ip(), + ) + .encode() + .to_vec() + .into(), + }, + ) + .unwrap() + } + } + } }) } @@ -348,7 +423,7 @@ impl ExecutionRuntime { public_key, addr, response, - } = *add_validator; + } = add_validator; let provider = ProviderBuilder::new() .wallet(wallet.clone()) .connect_http(http_url); @@ -370,13 +445,57 @@ impl ExecutionRuntime { .unwrap(); let _ = response.send(receipt); } + Message::AddValidatorV2(add_validator_v2) => { + let AddValidatorV2 { + http_url, + private_key, + address, + ingress, + egress, + response, + } = add_validator_v2; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config + .addValidator( + address, + private_key + .public_key() + .encode() + .as_ref() + .try_into() + .unwrap(), + ingress.to_string(), + egress.to_string(), + sign_add_validator_args( + EthChainSpec::chain(&chain_spec).id(), + &private_key, + address, + ingress, + egress, + ) + .encode() + .to_vec() + .into(), + ) + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } Message::ChangeValidatorStatus(change_validator_status) => { let ChangeValidatorStatus { http_url, active, index, response, - } = *change_validator_status; + } = change_validator_status; let provider = ProviderBuilder::new() .wallet(wallet.clone()) .connect_http(http_url); @@ -392,12 +511,138 @@ impl ExecutionRuntime { .unwrap(); let _ = response.send(receipt); } + Message::DeactivateValidatorV2(deacivate_validator_v2) => { + let DeactivateValidatorV2 { + http_url, + address, + response, + } = deacivate_validator_v2; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config_v2 = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config_v2 + .deactivateValidator(address) + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } + Message::GetV1Validators(get_v1_validators) => { + let GetV1Validators { http_url, response } = get_v1_validators; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config = + IValidatorConfig::new(VALIDATOR_CONFIG_ADDRESS, provider); + let validators = validator_config.getValidators().call().await.unwrap(); + let _ = response.send(validators); + } + Message::GetV2Validators(get_v2_validators) => { + let GetV2Validators { http_url, response } = get_v2_validators; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let validators = + validator_config.getAllValidators().call().await.unwrap(); + let _ = response.send(validators); + } + Message::InitializeIfMigrated(InitializeIfMigrated { + http_url, + response, + }) => { + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config_v2 = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config_v2 + .initializeIfMigrated() + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } + Message::MigrateValidator(migrate_validator) => { + let MigrateValidator { + http_url, + index, + response, + } = migrate_validator; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config_v2 = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config_v2 + .migrateValidator(index) + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } + Message::RotateValidator(rotate_validator) => { + let RotateValidator { + http_url, + private_key, + address, + ingress, + egress, + response, + } = rotate_validator; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config + .rotateValidator( + address, + private_key + .public_key() + .encode() + .as_ref() + .try_into() + .unwrap(), + ingress.to_string(), + egress.to_string(), + sign_rotate_validator_args( + EthChainSpec::chain(&chain_spec).id(), + &private_key, + address, + ingress, + egress, + ) + .encode() + .to_vec() + .into(), + ) + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } Message::SetNextFullDkgCeremony(set_next_full_dkg_ceremony) => { let SetNextFullDkgCeremony { http_url, epoch, response, - } = *set_next_full_dkg_ceremony; + } = set_next_full_dkg_ceremony; let provider = ProviderBuilder::new() .wallet(wallet.clone()) .connect_http(http_url); @@ -413,6 +658,27 @@ impl ExecutionRuntime { .unwrap(); let _ = response.send(receipt); } + Message::SetNextFullDkgCeremonyV2(set_next_full_dkg_ceremony_v2) => { + let SetNextFullDkgCeremonyV2 { + http_url, + epoch, + response, + } = set_next_full_dkg_ceremony_v2; + let provider = ProviderBuilder::new() + .wallet(wallet.clone()) + .connect_http(http_url); + let validator_config = + IValidatorConfigV2::new(VALIDATOR_CONFIG_V2_ADDRESS, provider); + let receipt = validator_config + .setNextFullDkgCeremony(epoch) + .send() + .await + .unwrap() + .get_receipt() + .await + .unwrap(); + let _ = response.send(receipt); + } Message::SpawnNode { name, config, @@ -467,7 +733,7 @@ impl ExecutionRuntime { public_key: PublicKey, addr: SocketAddr, ) -> eyre::Result { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send( AddValidator { @@ -484,13 +750,36 @@ impl ExecutionRuntime { .wrap_err("the execution runtime dropped the response channel before sending a receipt") } + pub async fn add_validator_v2( + &self, + http_url: Url, + validator: &TestingNode, + ) -> eyre::Result { + let (tx, rx) = oneshot::channel(); + self.to_runtime + .send( + AddValidatorV2 { + http_url, + private_key: validator.private_key().clone(), + address: validator.chain_address, + ingress: validator.ingress(), + egress: validator.egress(), + response: tx, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + pub async fn change_validator_status( &self, http_url: Url, index: u64, active: bool, ) -> eyre::Result { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send( ChangeValidatorStatus { @@ -506,12 +795,120 @@ impl ExecutionRuntime { .wrap_err("the execution runtime dropped the response channel before sending a receipt") } + pub async fn deactivate_validator_v2( + &self, + http_url: Url, + validator: &TestingNode, + ) -> eyre::Result { + let (tx, rx) = oneshot::channel(); + self.to_runtime + .send( + DeactivateValidatorV2 { + http_url, + address: validator.chain_address, + response: tx, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + + pub async fn get_v1_validators( + &self, + http_url: Url, + ) -> eyre::Result> { + let (tx, rx) = oneshot::channel(); + self.to_runtime + .send( + GetV1Validators { + http_url, + response: tx, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + + pub async fn get_v2_validators( + &self, + http_url: Url, + ) -> eyre::Result> { + let (tx, rx) = oneshot::channel(); + self.to_runtime + .send( + GetV2Validators { + http_url, + response: tx, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + + pub async fn initialize_if_migrated(&self, http_url: Url) -> eyre::Result { + let (response, rx) = oneshot::channel(); + self.to_runtime + .send(InitializeIfMigrated { http_url, response }.into()) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + + pub async fn migrate_validator( + &self, + http_url: Url, + index: u64, + ) -> eyre::Result { + let (response, rx) = oneshot::channel(); + self.to_runtime + .send( + MigrateValidator { + http_url, + index, + response, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + + pub async fn rotate_validator( + &self, + http_url: Url, + validator: &TestingNode, + ) -> eyre::Result { + let (response, rx) = oneshot::channel(); + self.to_runtime + .send( + RotateValidator { + http_url, + private_key: validator.private_key().clone(), + address: validator.chain_address, + ingress: validator.ingress(), + egress: validator.egress(), + response, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + pub async fn set_next_full_dkg_ceremony( &self, http_url: Url, epoch: u64, ) -> eyre::Result { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send( SetNextFullDkgCeremony { @@ -526,6 +923,26 @@ impl ExecutionRuntime { .wrap_err("the execution runtime dropped the response channel before sending a receipt") } + pub async fn set_next_full_dkg_ceremony_v2( + &self, + http_url: Url, + epoch: u64, + ) -> eyre::Result { + let (tx, rx) = oneshot::channel(); + self.to_runtime + .send( + SetNextFullDkgCeremonyV2 { + http_url, + epoch, + response: tx, + } + .into(), + ) + .map_err(|_| eyre::eyre!("the execution runtime went away"))?; + rx.await + .wrap_err("the execution runtime dropped the response channel before sending a receipt") + } + pub async fn remove_validator( &self, http_url: Url, @@ -533,7 +950,7 @@ impl ExecutionRuntime { public_key: PublicKey, addr: SocketAddr, ) -> eyre::Result { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send( AddValidator { @@ -559,7 +976,7 @@ impl ExecutionRuntime { Fut: std::future::Future + Send + 'static, T: Send + 'static, { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send(Message::RunAsync(Box::pin(async move { let result = fut.await; @@ -604,7 +1021,7 @@ impl ExecutionRuntimeHandle { config: ExecutionNodeConfig, database: DatabaseEnv, ) -> eyre::Result { - let (tx, rx) = tokio::sync::oneshot::channel(); + let (tx, rx) = oneshot::channel(); self.to_runtime .send(Message::SpawnNode { name: name.to_string(), @@ -773,14 +1190,22 @@ pub async fn launch_execution_node>( } enum Message { - AddValidator(Box), - ChangeValidatorStatus(Box), - SetNextFullDkgCeremony(Box), + AddValidator(AddValidator), + AddValidatorV2(AddValidatorV2), + ChangeValidatorStatus(ChangeValidatorStatus), + DeactivateValidatorV2(DeactivateValidatorV2), + GetV1Validators(GetV1Validators), + GetV2Validators(GetV2Validators), + InitializeIfMigrated(InitializeIfMigrated), + MigrateValidator(MigrateValidator), + RotateValidator(RotateValidator), + SetNextFullDkgCeremony(SetNextFullDkgCeremony), + SetNextFullDkgCeremonyV2(SetNextFullDkgCeremonyV2), SpawnNode { name: String, config: ExecutionNodeConfig, database: DatabaseEnv, - response: tokio::sync::oneshot::Sender, + response: oneshot::Sender, }, RunAsync(BoxFuture<'static, ()>), Stop, @@ -788,19 +1213,67 @@ enum Message { impl From for Message { fn from(value: AddValidator) -> Self { - Self::AddValidator(value.into()) + Self::AddValidator(value) + } +} + +impl From for Message { + fn from(value: AddValidatorV2) -> Self { + Self::AddValidatorV2(value) } } impl From for Message { fn from(value: ChangeValidatorStatus) -> Self { - Self::ChangeValidatorStatus(value.into()) + Self::ChangeValidatorStatus(value) + } +} + +impl From for Message { + fn from(value: DeactivateValidatorV2) -> Self { + Self::DeactivateValidatorV2(value) + } +} + +impl From for Message { + fn from(value: GetV1Validators) -> Self { + Self::GetV1Validators(value) + } +} + +impl From for Message { + fn from(value: GetV2Validators) -> Self { + Self::GetV2Validators(value) + } +} + +impl From for Message { + fn from(value: InitializeIfMigrated) -> Self { + Self::InitializeIfMigrated(value) + } +} + +impl From for Message { + fn from(value: MigrateValidator) -> Self { + Self::MigrateValidator(value) + } +} + +impl From for Message { + fn from(value: RotateValidator) -> Self { + Self::RotateValidator(value) } } impl From for Message { fn from(value: SetNextFullDkgCeremony) -> Self { - Self::SetNextFullDkgCeremony(value.into()) + Self::SetNextFullDkgCeremony(value) + } +} + +impl From for Message { + fn from(value: SetNextFullDkgCeremonyV2) -> Self { + Self::SetNextFullDkgCeremonyV2(value) } } @@ -811,7 +1284,18 @@ struct AddValidator { address: Address, public_key: PublicKey, addr: SocketAddr, - response: tokio::sync::oneshot::Sender, + response: oneshot::Sender, +} + +#[derive(Debug)] +struct AddValidatorV2 { + /// URL of the node to send this to. + http_url: Url, + private_key: PrivateKey, + address: Address, + ingress: SocketAddr, + egress: IpAddr, + response: oneshot::Sender, } #[derive(Debug)] @@ -820,7 +1304,51 @@ struct ChangeValidatorStatus { http_url: Url, index: u64, active: bool, - response: tokio::sync::oneshot::Sender, + response: oneshot::Sender, +} + +#[derive(Debug)] +struct DeactivateValidatorV2 { + /// URL of the node to send this to. + http_url: Url, + address: Address, + response: oneshot::Sender, +} + +struct GetV1Validators { + http_url: Url, + response: oneshot::Sender>, +} + +struct GetV2Validators { + http_url: Url, + response: oneshot::Sender>, +} + +#[derive(Debug)] +struct InitializeIfMigrated { + /// URL of the node to send this to. + http_url: Url, + response: oneshot::Sender, +} + +#[derive(Debug)] +struct MigrateValidator { + /// URL of the node to send this to. + http_url: Url, + index: u64, + response: oneshot::Sender, +} + +#[derive(Debug)] +struct RotateValidator { + /// URL of the node to send this to. + http_url: Url, + private_key: PrivateKey, + address: Address, + ingress: SocketAddr, + egress: IpAddr, + response: oneshot::Sender, } #[derive(Debug)] @@ -828,7 +1356,15 @@ struct SetNextFullDkgCeremony { /// URL of the node to send this to. http_url: Url, epoch: u64, - response: tokio::sync::oneshot::Sender, + response: oneshot::Sender, +} + +#[derive(Debug)] +struct SetNextFullDkgCeremonyV2 { + /// URL of the node to send this to. + http_url: Url, + epoch: u64, + response: oneshot::Sender, } pub fn admin() -> Address { @@ -843,9 +1379,46 @@ pub fn address(index: u32) -> Address { secret_key_to_address(MnemonicBuilder::from_phrase_nth(TEST_MNEMONIC, index).credential()) } -fn setup_tempo_evm() -> TempoEvm> { +fn setup_tempo_evm(chain_id: u64) -> TempoEvm> { let db = CacheDB::default(); - let env = EvmEnv::default(); + // revm sets timestamp to 1 by default, override it to 0 for genesis initializations + let mut env = EvmEnv::default().with_timestamp(U256::ZERO); + env.cfg_env.chain_id = chain_id; + let factory = TempoEvmFactory::default(); factory.create_evm(db, env) } + +fn sign_add_validator_args( + chain_id: u64, + key: &PrivateKey, + address: Address, + ingress: SocketAddr, + egress: IpAddr, +) -> Signature { + let mut hasher = Keccak256::new(); + hasher.update(chain_id.to_be_bytes()); + hasher.update(VALIDATOR_CONFIG_V2_ADDRESS.as_slice()); + hasher.update(address.as_slice()); + hasher.update(ingress.to_string().as_bytes()); + hasher.update(egress.to_string().as_bytes()); + let msg = hasher.finalize(); + key.sign(VALIDATOR_NS_ADD, msg.as_slice()) +} + +fn sign_rotate_validator_args( + chain_id: u64, + key: &PrivateKey, + address: Address, + ingress: SocketAddr, + egress: IpAddr, +) -> Signature { + let mut hasher = Keccak256::new(); + hasher.update(chain_id.to_be_bytes()); + hasher.update(VALIDATOR_CONFIG_V2_ADDRESS.as_slice()); + hasher.update(address.as_slice()); + hasher.update(ingress.to_string().as_bytes()); + hasher.update(egress.to_string().as_bytes()); + let msg = hasher.finalize(); + key.sign(VALIDATOR_NS_ROTATE, msg.as_slice()) +} diff --git a/crates/e2e/src/lib.rs b/crates/e2e/src/lib.rs index 26bde61d2d..2b28866de0 100644 --- a/crates/e2e/src/lib.rs +++ b/crates/e2e/src/lib.rs @@ -12,10 +12,15 @@ use std::{iter::repeat_with, net::SocketAddr, time::Duration}; +use alloy::signers::k256::schnorr::CryptoRngCore; +use alloy_primitives::Address; use commonware_consensus::types::Epoch; use commonware_cryptography::{ Signer as _, - bls12381::{dkg, primitives::sharing::Mode}, + bls12381::{ + dkg::{self}, + primitives::{group::Share, sharing::Mode}, + }, ed25519::{PrivateKey, PublicKey}, }; use commonware_math::algebra::Random as _; @@ -45,6 +50,68 @@ mod tests; pub const CONSENSUS_NODE_PREFIX: &str = "consensus"; pub const EXECUTION_NODE_PREFIX: &str = "execution"; +fn generate_consensus_node_config( + rng: &mut impl CryptoRngCore, + signers: u32, + verifiers: u32, +) -> ( + OnchainDkgOutcome, + ordered::Map, +) { + let signer_keys = repeat_with(|| PrivateKey::random(&mut *rng)) + .take(signers as usize) + .collect::>(); + + let (initial_dkg_outcome, shares) = dkg::deal::<_, _, N3f1>( + &mut *rng, + Mode::NonZeroCounter, + ordered::Set::try_from_iter(signer_keys.iter().map(|key| key.public_key())).unwrap(), + ) + .unwrap(); + + let onchain_dkg_outcome = OnchainDkgOutcome { + epoch: Epoch::zero(), + output: initial_dkg_outcome, + next_players: shares.keys().clone(), + is_next_full_dkg: false, + }; + + let verifier_keys = repeat_with(|| PrivateKey::random(&mut *rng)) + .take(verifiers as usize) + .collect::>(); + + let validators = ordered::Map::try_from_iter( + signer_keys + .into_iter() + .chain(verifier_keys) + .enumerate() + .map(|(i, private_key)| { + let public_key = private_key.public_key(); + let config = ConsensusNodeConfig { + address: crate::execution_runtime::validator(i as u32), + ingress: SocketAddr::from(([127, 0, 0, (i + 1) as u8], 8000)), + egress: SocketAddr::from(([127, 0, 0, (i + 1) as u8], 0)), + private_key, + share: shares.get_value(&public_key).cloned(), + }; + (public_key, config) + }), + ) + .unwrap(); + + (onchain_dkg_outcome, validators) +} + +/// Configuration for a validator. +#[derive(Clone, Debug)] +pub struct ConsensusNodeConfig { + pub address: Address, + pub ingress: SocketAddr, + pub egress: SocketAddr, + pub private_key: PrivateKey, + pub share: Option, +} + /// The test setup run by [`run`]. #[derive(Clone)] pub struct Setup { @@ -66,6 +133,13 @@ pub struct Setup { /// Whether to connect execution layer nodes directly. pub connect_execution_layer_nodes: bool, + + /// The t2 hardfork time. + /// + /// Validators will only be written into the V2 contract if t2_time == 0. + /// + /// Default: 1. + pub t2_time: u64, } impl Setup { @@ -81,6 +155,7 @@ impl Setup { }, epoch_length: 20, connect_execution_layer_nodes: false, + t2_time: 1, } } @@ -119,6 +194,10 @@ impl Setup { ..self } } + + pub fn t2_time(self, t2_time: u64) -> Self { + Self { t2_time, ..self } + } } impl Default for Setup { @@ -141,6 +220,7 @@ pub async fn setup_validators( connect_execution_layer_nodes, linkage, epoch_length, + t2_time, .. }: Setup, ) -> (Vec>, ExecutionRuntime) { @@ -154,52 +234,14 @@ pub async fn setup_validators( ); network.start(); - let mut signer_keys = repeat_with(|| PrivateKey::random(&mut *context)) - .take(how_many_signers as usize) - .collect::>(); - signer_keys.sort_by_key(|key| key.public_key()); - let (initial_dkg_outcome, shares) = dkg::deal::<_, _, N3f1>( - &mut *context, - Mode::NonZeroCounter, - ordered::Set::try_from_iter(signer_keys.iter().map(|key| key.public_key())).unwrap(), - ) - .unwrap(); - - let onchain_dkg_outcome = OnchainDkgOutcome { - epoch: Epoch::zero(), - output: initial_dkg_outcome, - next_players: shares.keys().clone(), - is_next_full_dkg: false, - }; - let mut verifier_keys = repeat_with(|| PrivateKey::random(&mut *context)) - .take(how_many_verifiers as usize) - .collect::>(); - verifier_keys.sort_by_key(|key| key.public_key()); - - // The port here does not matter because it will be ignored in simulated p2p. - // Still nice, because sometimes nodes can be better identified in logs. - let network_addresses = (1..) - .map(|port| SocketAddr::from(([127, 0, 0, 1], port))) - .take((how_many_signers + how_many_verifiers) as usize) - .collect::>(); - let chain_addresses = (0..) - .map(crate::execution_runtime::validator) - .take((how_many_signers + how_many_verifiers) as usize) - .collect::>(); - - let validators = ordered::Map::try_from_iter( - shares - .iter() - .zip(&network_addresses) - .zip(&chain_addresses) - .map(|((key, net_addr), chain_addr)| (key.clone(), (*net_addr, *chain_addr))), - ) - .unwrap(); + let (onchain_dkg_outcome, validators) = + generate_consensus_node_config(context, how_many_signers, how_many_verifiers); let execution_runtime = ExecutionRuntime::builder() .with_epoch_length(epoch_length) .with_initial_dkg_outcome(onchain_dkg_outcome) - .with_validators(validators) + .with_t2_time(t2_time) + .with_validators(validators.clone()) .launch() .unwrap(); @@ -209,31 +251,22 @@ pub async fn setup_validators( .generate(); let mut nodes = vec![]; - for ((((private_key, share), mut execution_config), network_address), chain_address) in - signer_keys - .into_iter() - .zip_eq(shares) - .map(|(signing_key, (verifying_key, share))| { - assert_eq!(signing_key.public_key(), verifying_key); - (signing_key, Some(share)) - }) - .chain(verifier_keys.into_iter().map(|key| (key, None))) - .zip_eq(execution_configs) - .zip_eq(network_addresses) - .zip_eq(chain_addresses) + + for ((public_key, consensus_node_config), mut execution_config) in + validators.into_iter().zip_eq(execution_configs) { + let ConsensusNodeConfig { + address, + ingress, + private_key, + share, + .. + } = consensus_node_config; let oracle = oracle.clone(); - let uid = format!("{CONSENSUS_NODE_PREFIX}_{}", private_key.public_key()); + let uid = format!("{CONSENSUS_NODE_PREFIX}_{public_key}"); let feed_state = FeedStateHandle::new(); - execution_config.validator_key = Some( - private_key - .public_key() - .encode() - .as_ref() - .try_into() - .unwrap(), - ); + execution_config.validator_key = Some(public_key.encode().as_ref().try_into().unwrap()); execution_config.feed_state = Some(feed_state.clone()); let engine_config = consensus::Builder { @@ -261,13 +294,13 @@ pub async fn setup_validators( nodes.push(TestingNode::new( uid, - private_key.public_key(), + private_key, oracle.clone(), engine_config, execution_runtime.handle(), execution_config, - network_address, - chain_address, + ingress, + address, )); } @@ -283,7 +316,7 @@ pub fn run(setup: Setup, mut stop_condition: impl FnMut(&str, &str) -> bool) -> executor.start(|mut context| async move { // Setup and run all validators. - let (mut nodes, _execution_runtime) = setup_validators(&mut context, setup).await; + let (mut nodes, _execution_runtime) = setup_validators(&mut context, setup.clone()).await; join_all(nodes.iter_mut().map(|node| node.start(&context))).await; @@ -305,6 +338,18 @@ pub fn run(setup: Setup, mut stop_condition: impl FnMut(&str, &str) -> bool) -> assert_eq!(value, 0); } + if setup.t2_time == 0 { + if metric.ends_with("_dkg_manager_read_players_from_v1_contract_total") { + assert_eq!(0, value.parse::().unwrap()); + } + if metric.ends_with("_dkg_manager_syncing_players") { + assert_eq!(0, value.parse::().unwrap()); + } + if metric.ends_with("_dkg_manager_read_re_dkg_epoch_from_v1_contract_total") { + assert_eq!(0, value.parse::().unwrap()); + } + } + if stop_condition(metric, value) { success = true; break; diff --git a/crates/e2e/src/testing_node.rs b/crates/e2e/src/testing_node.rs index c6f2988c21..75ddb42aae 100644 --- a/crates/e2e/src/testing_node.rs +++ b/crates/e2e/src/testing_node.rs @@ -2,7 +2,10 @@ use crate::execution_runtime::{self, ExecutionNode, ExecutionNodeConfig, ExecutionRuntimeHandle}; use alloy_primitives::Address; -use commonware_cryptography::ed25519::PublicKey; +use commonware_cryptography::{ + Signer as _, + ed25519::{PrivateKey, PublicKey}, +}; use commonware_p2p::simulated::{Control, Oracle, SocketManager}; use commonware_runtime::{Handle, Metrics as _, deterministic::Context}; use reth_db::{Database, DatabaseEnv, mdbx::DatabaseArguments, open_db_read_only}; @@ -14,7 +17,11 @@ use reth_ethereum::{ storage::BlockNumReader, }; use reth_node_builder::NodeTypesWithDBAdapter; -use std::{net::SocketAddr, path::PathBuf, sync::Arc}; +use std::{ + net::{IpAddr, SocketAddr}, + path::PathBuf, + sync::Arc, +}; use tempo_commonware_node::{ BROADCASTER_CHANNEL_IDENT, BROADCASTER_LIMIT, CERTIFICATES_CHANNEL_IDENT, CERTIFICATES_LIMIT, DKG_CHANNEL_IDENT, DKG_LIMIT, MARSHAL_CHANNEL_IDENT, MARSHAL_LIMIT, RESOLVER_CHANNEL_IDENT, @@ -32,7 +39,7 @@ where /// Unique identifier for this node pub uid: String, /// Public key of the validator - pub public_key: PublicKey, + pub private_key: PrivateKey, /// Simulated network oracle for test environments pub oracle: Oracle, /// Consensus configuration used to start the consensus engine @@ -76,7 +83,7 @@ where #[expect(clippy::too_many_arguments, reason = "quickly threw this together")] pub fn new( uid: String, - public_key: PublicKey, + private_key: PrivateKey, oracle: Oracle, consensus_config: consensus::Builder< Control, @@ -87,6 +94,7 @@ where network_address: SocketAddr, chain_address: Address, ) -> Self { + let public_key = private_key.public_key(); let execution_node_datadir = execution_runtime .nodes_dir() .join(execution_runtime::execution_node_name(&public_key)); @@ -94,7 +102,7 @@ where let execution_node_name = execution_runtime::execution_node_name(&public_key); Self { uid, - public_key, + private_key, oracle, consensus_config, consensus_handle: None, @@ -112,9 +120,13 @@ where } } + pub fn private_key(&self) -> &PrivateKey { + &self.private_key + } + /// Get the validator public key of this node. - pub fn public_key(&self) -> &PublicKey { - &self.public_key + pub fn public_key(&self) -> PublicKey { + self.private_key.public_key() } /// Get the unique identifier of this node. @@ -141,6 +153,24 @@ where &self.oracle } + pub fn ingress(&self) -> SocketAddr { + self.network_address + } + + pub fn egress(&self) -> IpAddr { + self.network_address.ip() + } + + /// A verifier is a node that has a share. + pub fn is_signer(&self) -> bool { + self.consensus_config.share.is_some() + } + + /// A verifier is a node that has no share. + pub fn is_verifier(&self) -> bool { + self.consensus_config.share.is_none() + } + /// Start both consensus and execution layers. /// /// @@ -225,43 +255,43 @@ where let votes = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(VOTES_CHANNEL_IDENT, VOTES_LIMIT) .await .unwrap(); let certificates = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(CERTIFICATES_CHANNEL_IDENT, CERTIFICATES_LIMIT) .await .unwrap(); let resolver = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(RESOLVER_CHANNEL_IDENT, RESOLVER_LIMIT) .await .unwrap(); let broadcast = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(BROADCASTER_CHANNEL_IDENT, BROADCASTER_LIMIT) .await .unwrap(); let marshal = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(MARSHAL_CHANNEL_IDENT, MARSHAL_LIMIT) .await .unwrap(); let dkg = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(DKG_CHANNEL_IDENT, DKG_LIMIT) .await .unwrap(); let subblocks = self .oracle - .control(self.public_key.clone()) + .control(self.public_key()) .register(SUBBLOCKS_CHANNEL_IDENT, SUBBLOCKS_LIMIT) .await .unwrap(); diff --git a/crates/e2e/src/tests/consensus_rpc.rs b/crates/e2e/src/tests/consensus_rpc.rs index 0a0e4bafa6..68a41e383f 100644 --- a/crates/e2e/src/tests/consensus_rpc.rs +++ b/crates/e2e/src/tests/consensus_rpc.rs @@ -5,7 +5,9 @@ use std::{net::SocketAddr, time::Duration}; -use super::dkg::common::{assert_no_dkg_failures, wait_for_validators_to_reach_epoch, wait_for_outcome}; +use super::dkg::common::{ + assert_no_dkg_failures, wait_for_outcome, wait_for_validators_to_reach_epoch, +}; use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; use alloy::transports::http::reqwest::Url; use alloy_primitives::hex; @@ -33,7 +35,10 @@ async fn consensus_subscribe_and_query_finalization() { let _ = tempo_eyre::install(); let initial_height = 3; - let setup = Setup::new().how_many_signers(1).epoch_length(100); + let setup = Setup::new() + .how_many_signers(1) + .t2_time(0) + .epoch_length(100); let cfg = deterministic::Config::default().with_seed(setup.seed); let (addr_tx, addr_rx) = oneshot::channel::<(SocketAddr, SocketAddr)>(); @@ -156,6 +161,7 @@ fn get_identity_transition_proof_after_full_dkg() { let setup = Setup::new() .how_many_signers(how_many_signers) + .t2_time(0) .epoch_length(epoch_length); let seed = setup.seed; diff --git a/crates/e2e/src/tests/dkg/common.rs b/crates/e2e/src/tests/dkg/common.rs index 62edc35bcd..4c07db1b5d 100644 --- a/crates/e2e/src/tests/dkg/common.rs +++ b/crates/e2e/src/tests/dkg/common.rs @@ -11,6 +11,21 @@ use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; use crate::{CONSENSUS_NODE_PREFIX, TestingNode}; +/// Returns the target epoch to wait for depending on `event_height`. +/// +/// If `event_height` is less than a boundary height, then the next epoch is +/// returned. Otherwise, the one *after* the next is returned. +pub(crate) fn target_epoch(epoch_length: u64, event_height: u64) -> Epoch { + let strat = FixedEpocher::new(NZU64!(epoch_length)); + let event_height = Height::new(event_height); + let info = strat.containing(event_height).unwrap(); + if info.last() == event_height { + info.epoch().next().next() + } else { + info.epoch().next() + } +} + /// Reads the DKG outcome from a block, returns None if block doesn't exist or has no outcome. pub(crate) fn read_outcome_from_validator( validator: &TestingNode, diff --git a/crates/e2e/src/tests/dkg/full_ceremony.rs b/crates/e2e/src/tests/dkg/full_ceremony.rs index 99322aac39..d84e7fceed 100644 --- a/crates/e2e/src/tests/dkg/full_ceremony.rs +++ b/crates/e2e/src/tests/dkg/full_ceremony.rs @@ -8,7 +8,7 @@ use commonware_runtime::{ }; use futures::future::join_all; -use super::common::{assert_no_dkg_failures, wait_for_validators_to_reach_epoch, wait_for_outcome}; +use super::common::{assert_no_dkg_failures, wait_for_outcome, wait_for_validators_to_reach_epoch}; use crate::{Setup, setup_validators}; #[test_traced] @@ -77,7 +77,12 @@ impl FullDkgTest { tracing::info!(?pubkey_before, "Group public key BEFORE full DKG"); // Step 2: Wait for full DKG to complete (epoch N+1) - wait_for_validators_to_reach_epoch(&context, self.full_dkg_epoch + 1, self.how_many_signers).await; + wait_for_validators_to_reach_epoch( + &context, + self.full_dkg_epoch + 1, + self.how_many_signers, + ) + .await; assert_no_dkg_failures(&context); // Step 3: Verify full DKG created a NEW polynomial (different public key) @@ -99,7 +104,12 @@ impl FullDkgTest { tracing::info!("Verified: full DKG created independent polynomial"); // Step 4: Wait for reshare (epoch N+2) and verify it PRESERVES the public key - wait_for_validators_to_reach_epoch(&context, self.full_dkg_epoch + 2, self.how_many_signers).await; + wait_for_validators_to_reach_epoch( + &context, + self.full_dkg_epoch + 2, + self.how_many_signers, + ) + .await; assert_no_dkg_failures(&context); let outcome_after_reshare = wait_for_outcome( diff --git a/crates/e2e/src/tests/dkg/static_transitions/mod.rs b/crates/e2e/src/tests/dkg/static_transitions.rs similarity index 100% rename from crates/e2e/src/tests/dkg/static_transitions/mod.rs rename to crates/e2e/src/tests/dkg/static_transitions.rs diff --git a/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/mod.rs b/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/mod.rs new file mode 100644 index 0000000000..57ff963a13 --- /dev/null +++ b/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/mod.rs @@ -0,0 +1 @@ +mod static_sets; diff --git a/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/static_sets.rs b/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/static_sets.rs new file mode 100644 index 0000000000..a2f857f3be --- /dev/null +++ b/crates/e2e/src/tests/migration_from_v1_to_v2/dkg/static_sets.rs @@ -0,0 +1,204 @@ +use std::time::Duration; + +use alloy::transports::http::reqwest::Url; +use commonware_consensus::types::{Epocher, FixedEpocher, Height}; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock, Metrics as _, Runner as _, + deterministic::{Config, Runner}, +}; +use commonware_utils::NZU64; +use futures::future::join_all; + +use crate::{ + CONSENSUS_NODE_PREFIX, Setup, setup_validators, + tests::dkg::common::wait_for_validators_to_reach_epoch, +}; + +#[test_traced] +fn single_node_transitions_once() { + AssertTransition { + how_many_signers: 1, + epoch_length: 10, + how_many_epochs: 1, + } + .run() +} + +#[test_traced] +fn single_node_transitions_twice() { + AssertTransition { + how_many_signers: 1, + epoch_length: 10, + how_many_epochs: 2, + } + .run() +} + +#[test_traced] +fn two_nodes_transition_once() { + AssertTransition { + how_many_signers: 2, + epoch_length: 10, + how_many_epochs: 1, + } + .run() +} + +#[test_traced] +fn two_nodes_transition_twice() { + AssertTransition { + how_many_signers: 2, + epoch_length: 10, + how_many_epochs: 1, + } + .run() +} + +#[test_traced] +fn four_nodes_transition_once() { + AssertTransition { + how_many_signers: 4, + epoch_length: 20, + how_many_epochs: 1, + } + .run() +} + +#[test_traced] +fn four_nodes_transition_twice() { + AssertTransition { + how_many_signers: 4, + epoch_length: 20, + how_many_epochs: 2, + } + .run() +} + +struct AssertTransition { + how_many_signers: u32, + epoch_length: u64, + how_many_epochs: u64, +} + +impl AssertTransition { + fn run(self) { + let Self { + how_many_signers, + epoch_length, + how_many_epochs, + } = self; + let _ = tempo_eyre::install(); + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(epoch_length); + + let executor = Runner::from(Config::default().with_seed(setup.seed)); + + executor.start(|mut context| async move { + // HACK: Sleep 1 second to ensure the deterministic runtime returns + // .current().epoch_millis() > 1000. + context.sleep(Duration::from_secs(1)).await; + + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + let http_url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse::() + .unwrap(); + + for i in 0..how_many_signers { + tracing::debug!( + block.number = execution_runtime + .migrate_validator(http_url.clone(), i as u64) + .await + .unwrap() + .block_number, + "migrateValidator returned receipt", + ); + } + let initialization_height = execution_runtime + .initialize_if_migrated(http_url.clone()) + .await + .unwrap() + .block_number + .unwrap(); + + let epoch_strat = FixedEpocher::new(NZU64!(epoch_length)); + let info = epoch_strat + .containing(Height::new(initialization_height)) + .unwrap(); + let initialization_epoch = info.epoch(); + tracing::debug!( + initialization_height, + %initialization_epoch, + "initializeIfMigrated completed", + ); + + // The epoch at which we start checking nodes for transitions. + // + // If the migration completed in epoch 0, we need to wait for + // all nodes to enter epoch 1 before their metrics make sense. + let start_epoch = if info.last().get() == initialization_height { + initialization_epoch.next().next() + } else { + initialization_epoch.next() + } + .get(); + let mut epoch_count = 0; + while epoch_count < how_many_epochs { + tracing::error!("waiting for epoch {}", start_epoch + epoch_count); + wait_for_validators_to_reach_epoch( + &context, + start_epoch + epoch_count, + how_many_signers, + ) + .await; + + for line in context.encode().lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + if metric.ends_with("_dkg_manager_read_players_from_v1_contract_total") { + assert_eq!( + initialization_epoch.get(), + value.parse::().unwrap(), + "v1 contract must only have been read for however \ + many epochs it took to initialize the v2 contract" + ); + } + if metric.ends_with("_dkg_manager_read_players_from_v2_contract_total") { + assert!(value.parse::().unwrap() > 0); + } + if metric.ends_with("_dkg_manager_read_re_dkg_epoch_from_v1_contract_total") { + assert_eq!( + initialization_epoch.get(), + value.parse::().unwrap(), + "v1 contract must only have been read for however \ + many epochs it took to initialize the v2 contract" + ); + } + if metric.ends_with("_dkg_manager_read_re_dkg_epoch_from_v2_contract_total") { + assert!(value.parse::().unwrap() > 0); + } + if metric.ends_with("_dkg_manager_syncing_players") { + assert_eq!( + 0, + value.parse::().unwrap(), + "once migrated, the node should no longer consider syncing players", + ); + } + } + epoch_count += 1; + } + }) + } +} diff --git a/crates/e2e/src/tests/migration_from_v1_to_v2/mod.rs b/crates/e2e/src/tests/migration_from_v1_to_v2/mod.rs new file mode 100644 index 0000000000..b91c7149ce --- /dev/null +++ b/crates/e2e/src/tests/migration_from_v1_to_v2/mod.rs @@ -0,0 +1 @@ +mod dkg; diff --git a/crates/e2e/src/tests/mod.rs b/crates/e2e/src/tests/mod.rs index b4e545e4e0..ebb77caac7 100644 --- a/crates/e2e/src/tests/mod.rs +++ b/crates/e2e/src/tests/mod.rs @@ -8,9 +8,11 @@ mod consensus_rpc; mod dkg; mod linkage; mod metrics; +mod migration_from_v1_to_v2; mod restart; mod subblocks; mod sync; +mod v2_at_genesis; #[test_traced] fn spawning_execution_node_works() { @@ -26,7 +28,7 @@ fn spawning_execution_node_works() { // #[test] // fn spawning_execution_node_works() { // let _telemetry = tracing_subscriber::fmt() - // .with_max_level(Level::DEBUG) + // .with_max_level(tracing::Level::DEBUG) // .with_test_writer() // .try_init(); // diff --git a/crates/e2e/src/tests/sync.rs b/crates/e2e/src/tests/sync.rs index e3aed54384..68a871e59a 100644 --- a/crates/e2e/src/tests/sync.rs +++ b/crates/e2e/src/tests/sync.rs @@ -112,7 +112,7 @@ fn joins_from_snapshot() { // Now turn the receiver into the donor - except for the database dir and // env. This simulates a start from a snapshot. receiver.uid = donor.uid; - receiver.public_key = donor.public_key; + receiver.private_key = donor.private_key; { let peer_manager = receiver.consensus_config.peer_manager.clone(); receiver.consensus_config = donor.consensus_config; @@ -273,7 +273,7 @@ fn can_restart_after_joining_from_snapshot() { // Now turn the receiver into the donor - except for the database dir and // env. This simulates a start from a snapshot. receiver.uid = donor.uid; - receiver.public_key = donor.public_key; + receiver.private_key = donor.private_key; { let peer_manager = receiver.consensus_config.peer_manager.clone(); receiver.consensus_config = donor.consensus_config; diff --git a/crates/e2e/src/tests/v2_at_genesis/backfill.rs b/crates/e2e/src/tests/v2_at_genesis/backfill.rs new file mode 100644 index 0000000000..4afbdfabc5 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/backfill.rs @@ -0,0 +1,134 @@ +use std::time::Duration; + +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock, Metrics, Runner as _, + deterministic::{Config, Runner}, +}; +use futures::future::join_all; +use reth_ethereum::storage::BlockNumReader; +use reth_node_metrics::recorder::install_prometheus_recorder; + +use crate::{ + CONSENSUS_NODE_PREFIX, Setup, get_pipeline_runs, setup_validators, + tests::v2_at_genesis::assert_no_v1, +}; + +#[test_traced] +fn validator_can_join_later_with_live_sync() { + AssertJoinsLate { + blocks_before_join: 5, + blocks_after_join: 10, + should_pipeline_sync: false, + } + .run(); +} + +#[test_traced] +fn validator_can_join_later_with_pipeline_sync() { + AssertJoinsLate { + blocks_before_join: 65, + blocks_after_join: 70, + should_pipeline_sync: false, + } + .run(); + let _ = tempo_eyre::install(); +} + +#[track_caller] +fn assert_no_new_epoch(context: &impl Metrics, max_epoch: u64) { + let metrics = context.encode(); + for line in metrics.lines() { + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metrics.ends_with("_peers_blocked") { + let value = value.parse::().unwrap(); + assert_eq!(value, 0); + } + + if metric.ends_with("_epoch_manager_latest_epoch") { + let value = value.parse::().unwrap(); + assert!(value <= max_epoch, "epoch progressed; sync likely failed"); + } + } +} + +struct AssertJoinsLate { + blocks_before_join: u64, + blocks_after_join: u64, + should_pipeline_sync: bool, +} +impl AssertJoinsLate { + fn run(self) { + let Self { + blocks_before_join, + blocks_after_join, + should_pipeline_sync, + } = self; + + let _ = tempo_eyre::install(); + let metrics_recorder = install_prometheus_recorder(); + + let setup = Setup::new() + .epoch_length(100) + .t2_time(0) + .connect_execution_layer_nodes(should_pipeline_sync); + + Runner::from(Config::default().with_seed(setup.seed)).start(|mut context| async move { + let (mut nodes, _execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + // Start all nodes except the last one + let mut last = nodes.pop().unwrap(); + join_all(nodes.iter_mut().map(|node| node.start(&context))).await; + + // Wait for chain to advance before starting the last node + while nodes[0].execution_provider().last_block_number().unwrap() < blocks_before_join { + context.sleep(Duration::from_secs(1)).await; + } + + last.start(&context).await; + assert_eq!(last.execution_provider().last_block_number().unwrap(), 0); + + tracing::debug!("last node started"); + + // Assert that last node is able to catch up and progress + while last.execution_provider().last_block_number().unwrap() < blocks_after_join { + context.sleep(Duration::from_millis(100)).await; + assert_no_new_epoch(&context, 0); + } + for line in context.encode().lines() { + if line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + assert_no_v1(metric, value); + } + // Verify backfill behavior + let actual_runs = get_pipeline_runs(metrics_recorder); + if should_pipeline_sync { + assert!( + actual_runs > 0, + "at least one backfill must have been triggered" + ); + } else { + assert_eq!( + 0, actual_runs, + "expected no backfill, got {actual_runs} runs" + ); + } + + // Verify that the node is still progressing after sync + let last_block = last.execution_provider().last_block_number().unwrap(); + context.sleep(Duration::from_secs(10)).await; + assert!( + last.execution_provider().last_block_number().unwrap() > last_block, + "node should still be progressing after sync" + ); + }); + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/consensus_rpc.rs b/crates/e2e/src/tests/v2_at_genesis/consensus_rpc.rs new file mode 100644 index 0000000000..e26c8d711d --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/consensus_rpc.rs @@ -0,0 +1,299 @@ +//! Tests for the consensus RPC namespace. +//! +//! These tests verify that the consensus RPC endpoints work correctly, +//! including subscriptions and queries. + +use std::{net::SocketAddr, time::Duration}; + +use super::dkg::common::{ + assert_no_dkg_failures, wait_for_outcome, wait_for_validators_to_reach_epoch, +}; +use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; +use alloy::transports::http::reqwest::Url; +use alloy_primitives::hex; +use commonware_codec::ReadExt as _; +use commonware_consensus::simplex::{scheme::bls12381_threshold::vrf::Scheme, types::Finalization}; +use commonware_cryptography::{ + bls12381::primitives::variant::{MinSig, Variant}, + ed25519::PublicKey, +}; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock, Metrics as _, Runner as _, + deterministic::{self, Context, Runner}, +}; +use futures::{channel::oneshot, future::join_all}; +use jsonrpsee::{http_client::HttpClientBuilder, ws_client::WsClientBuilder}; +use tempo_commonware_node::consensus::Digest; +use tempo_node::rpc::consensus::{Event, Query, TempoConsensusApiClient}; + +/// Test that subscribing to consensus events works and that finalization +/// can be queried via HTTP after receiving a finalization event. +#[tokio::test] +#[test_traced] +async fn consensus_subscribe_and_query_finalization() { + let _ = tempo_eyre::install(); + + let initial_height = 3; + let setup = Setup::new().how_many_signers(1).epoch_length(100); + let cfg = deterministic::Config::default().with_seed(setup.seed); + + let (addr_tx, addr_rx) = oneshot::channel::<(SocketAddr, SocketAddr)>(); + let (done_tx, done_rx) = oneshot::channel::<()>(); + + let executor_handle = std::thread::spawn(move || { + let executor = Runner::from(cfg); + executor.start(|mut context| async move { + let (mut validators, _execution_runtime) = setup_validators(&mut context, setup).await; + validators[0].start(&context).await; + wait_for_height(&context, initial_height).await; + + let execution = validators[0].execution(); + + addr_tx + .send(( + execution.rpc_server_handles.rpc.http_local_addr().unwrap(), + execution.rpc_server_handles.rpc.ws_local_addr().unwrap(), + )) + .unwrap(); + + let _ = done_rx.await; + }); + }); + + let (http_addr, ws_addr) = addr_rx.await.unwrap(); + let ws_url = format!("ws://{ws_addr}"); + let http_url = format!("http://{http_addr}"); + let ws_client = WsClientBuilder::default().build(&ws_url).await.unwrap(); + let mut subscription = ws_client.subscribe_events().await.unwrap(); + + let http_client = HttpClientBuilder::default().build(&http_url).unwrap(); + + let mut saw_notarized = false; + let mut saw_finalized = false; + let mut current_height = initial_height; + + while !saw_notarized || !saw_finalized { + let event = tokio::time::timeout(Duration::from_secs(10), subscription.next()) + .await + .unwrap() + .unwrap() + .unwrap(); + + match event { + Event::Notarized { .. } => { + saw_notarized = true; + } + Event::Finalized { block, .. } => { + let height = block.height.unwrap(); + assert!( + height > current_height, + "finalized height should be > {current_height}" + ); + + let queried_block = http_client + .get_finalization(Query::Height(height)) + .await + .unwrap() + .unwrap(); + + assert_eq!(queried_block, block); + + current_height = height; + saw_finalized = true; + } + Event::Nullified { .. } => {} + } + } + + let _ = http_client + .get_finalization(Query::Latest) + .await + .unwrap() + .unwrap(); + + let state = http_client.get_latest().await.unwrap(); + + assert!(state.finalized.is_some()); + + drop(done_tx); + executor_handle.join().unwrap(); +} + +/// Wait for a validator to reach a target height by checking metrics. +async fn wait_for_height(context: &Context, target_height: u64) { + loop { + let metrics = context.encode(); + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + if metric.ends_with("_marshal_processed_height") { + let height = value.parse::().unwrap(); + if height >= target_height { + return; + } + } + } + context.sleep(Duration::from_millis(100)).await; + } +} + +/// Test that `get_identity_transition_proof` returns valid proofs after a full DKG ceremony. +/// +/// This verifies: +/// 1. After a full DKG, the RPC returns a transition with different old/new public keys +/// 2. The transition epoch matches where the full DKG occurred +/// 3. The proof contains a valid header and certificate +#[test_traced] +fn get_identity_transition_proof_after_full_dkg() { + let _ = tempo_eyre::install(); + + let how_many_signers = 1; + let epoch_length = 10; + let full_dkg_epoch: u64 = 1; + + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(epoch_length); + + let seed = setup.seed; + let cfg = deterministic::Config::default().with_seed(seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // Get HTTP URL for RPC + let http_url: Url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse() + .unwrap(); + + // Schedule full DKG for epoch 1 + execution_runtime + .set_next_full_dkg_ceremony(http_url.clone(), full_dkg_epoch) + .await + .unwrap(); + + // Wait for is_next_full_dkg flag + let outcome_before = + wait_for_outcome(&context, &validators, full_dkg_epoch - 1, epoch_length).await; + assert!( + outcome_before.is_next_full_dkg, + "Epoch {} outcome should have is_next_full_dkg=true", + full_dkg_epoch - 1 + ); + let pubkey_before = *outcome_before.sharing().public(); + + // Wait for full DKG to complete + wait_for_validators_to_reach_epoch(&context, full_dkg_epoch + 1, how_many_signers).await; + assert_no_dkg_failures(&context); + + // Verify the full DKG created a new public key + let outcome_after = + wait_for_outcome(&context, &validators, full_dkg_epoch, epoch_length).await; + let pubkey_after = *outcome_after.sharing().public(); + assert_ne!( + pubkey_before, pubkey_after, + "Full DKG must produce a DIFFERENT group public key" + ); + + // Test 1: Query from latest epoch (after full DKG) - should have transition + // Run on execution runtime's tokio runtime since jsonrpsee requires tokio + let http_url_str = http_url.to_string(); + let response = execution_runtime + .run_async(async move { + let http_client = HttpClientBuilder::default().build(&http_url_str).unwrap(); + http_client + .get_identity_transition_proof(None, Some(false)) + .await + .unwrap() + }) + .await + .unwrap(); + + assert!( + !response.identity.is_empty(), + "Identity should always be present" + ); + assert_eq!( + response.transitions.len(), + 1, + "Expected exactly one transition" + ); + + let transition = &response.transitions[0]; + assert_eq!( + transition.transition_epoch, full_dkg_epoch, + "Transition epoch should match full DKG epoch" + ); + assert_ne!( + transition.old_identity, transition.new_identity, + "Old and new public keys should be different" + ); + assert_eq!( + response.identity, transition.new_identity, + "Identity should match the new public key from the latest transition" + ); + + // Decode and verify the BLS signature + let old_pubkey_bytes = hex::decode(&transition.old_identity).unwrap(); + let old_pubkey = ::Public::read(&mut old_pubkey_bytes.as_slice()) + .expect("valid BLS public key"); + let proof = transition + .proof + .as_ref() + .expect("non-genesis transition should have proof"); + let finalization = Finalization::, Digest>::read( + &mut hex::decode(&proof.finalization_certificate) + .unwrap() + .as_slice(), + ) + .expect("valid finalization"); + + assert!( + finalization.verify( + &mut context, + &Scheme::certificate_verifier(tempo_commonware_node::NAMESPACE, old_pubkey), + &commonware_parallel::Sequential + ), + "BLS signature verification failed" + ); + + // Test 2: Query from epoch 0 (before full DKG) - should have identity but no transitions + let old_identity = transition.old_identity.clone(); + let http_url_str = http_url.to_string(); + let response_epoch0 = execution_runtime + .run_async(async move { + let http_client = HttpClientBuilder::default().build(&http_url_str).unwrap(); + http_client + .get_identity_transition_proof(Some(0), Some(false)) + .await + .unwrap() + }) + .await + .unwrap(); + + assert!( + !response_epoch0.identity.is_empty(), + "Identity should be present even at epoch 0" + ); + assert!( + response_epoch0.transitions.is_empty(), + "Should have no transitions when querying from epoch 0" + ); + assert_eq!( + response_epoch0.identity, old_identity, + "Identity at epoch 0 should be the old public key (before full DKG)" + ); + }); +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/common.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/common.rs new file mode 100644 index 0000000000..62edc35bcd --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/common.rs @@ -0,0 +1,140 @@ +//! Common helpers for DKG tests. + +use std::time::Duration; + +use commonware_codec::ReadExt as _; +use commonware_consensus::types::{Epoch, Epocher as _, FixedEpocher, Height}; +use commonware_runtime::{Clock as _, Metrics as _, deterministic::Context}; +use commonware_utils::NZU64; +use reth_ethereum::provider::BlockReader as _; +use tempo_dkg_onchain_artifacts::OnchainDkgOutcome; + +use crate::{CONSENSUS_NODE_PREFIX, TestingNode}; + +/// Reads the DKG outcome from a block, returns None if block doesn't exist or has no outcome. +pub(crate) fn read_outcome_from_validator( + validator: &TestingNode, + block_num: Height, +) -> Option { + let provider = validator.execution_provider(); + let block = provider.block_by_number(block_num.get()).ok()??; + let extra_data = &block.header.inner.extra_data; + + if extra_data.is_empty() { + return None; + } + + Some(OnchainDkgOutcome::read(&mut extra_data.as_ref()).expect("valid DKG outcome")) +} + +/// Parses a metric line, returning (metric_name, value) if valid. +pub(crate) fn parse_metric_line(line: &str) -> Option<(&str, u64)> { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + return None; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next()?; + let value = parts.next()?.parse().ok()?; + + Some((metric, value)) +} + +/// Waits for and reads the DKG outcome from the last block of the given epoch. +pub(crate) async fn wait_for_outcome( + context: &Context, + validators: &[TestingNode], + epoch: u64, + epoch_length: u64, +) -> OnchainDkgOutcome { + let height = FixedEpocher::new(NZU64!(epoch_length)) + .last(Epoch::new(epoch)) + .expect("valid epoch"); + + tracing::info!(epoch, %height, "Waiting for DKG outcome"); + + loop { + context.sleep(Duration::from_secs(1)).await; + + if let Some(outcome) = read_outcome_from_validator(&validators[0], height) { + tracing::info!( + epoch, + %height, + outcome_epoch = %outcome.epoch, + is_next_full_dkg = outcome.is_next_full_dkg, + "Read DKG outcome" + ); + return outcome; + } + } +} + +/// Counts how many validators have reached the target epoch. +pub(crate) fn count_validators_at_epoch(context: &Context, target_epoch: u64) -> u32 { + let metrics = context.encode(); + let mut at_epoch = 0; + + for line in metrics.lines() { + let Some((metric, value)) = parse_metric_line(line) else { + continue; + }; + + if metric.ends_with("_epoch_manager_latest_epoch") && value >= target_epoch { + at_epoch += 1; + } + } + + at_epoch +} + +/// Waits until at least `min_validators` have reached the target epoch. +pub(crate) async fn wait_for_validators_to_reach_epoch( + context: &Context, + target_epoch: u64, + min_validators: u32, +) { + tracing::info!(target_epoch, min_validators, "Waiting for epoch"); + + loop { + context.sleep(Duration::from_secs(1)).await; + + if count_validators_at_epoch(context, target_epoch) >= min_validators { + tracing::info!(target_epoch, "Validators reached epoch"); + return; + } + } +} + +/// Asserts that no DKG ceremony failures have occurred. +#[track_caller] +pub(crate) fn assert_no_dkg_failures(context: &Context) { + let metrics = context.encode(); + + for line in metrics.lines() { + let Some((metric, value)) = parse_metric_line(line) else { + continue; + }; + + if metric.ends_with("_dkg_manager_ceremony_failures_total") { + assert_eq!(0, value, "DKG ceremony failed: {metric}"); + } + } +} + +/// Asserts that at least one validator has skipped rounds (indicating sync occurred). +#[track_caller] +pub(crate) fn assert_skipped_rounds(context: &Context) { + let metrics = context.encode(); + + for line in metrics.lines() { + let Some((metric, value)) = parse_metric_line(line) else { + continue; + }; + + if metric.ends_with("_rounds_skipped_total") && value > 0 { + return; + } + } + + panic!("Expected at least one validator to have skipped rounds during sync"); +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/dynamic.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/dynamic.rs new file mode 100644 index 0000000000..e7c007beea --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/dynamic.rs @@ -0,0 +1,302 @@ +use std::time::Duration; + +use alloy::transports::http::reqwest::Url; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock as _, Metrics as _, Runner as _, + deterministic::{Config, Runner}, +}; +use futures::future::join_all; + +use crate::{ + CONSENSUS_NODE_PREFIX, Setup, setup_validators, + tests::{ + dkg::common::target_epoch, + v2_at_genesis::{assert_no_dkg_failure, assert_no_v1}, + }, +}; + +#[test_traced] +fn validator_is_added_to_a_set_of_one() { + AssertValidatorIsAdded { + how_many_initial: 1, + epoch_length: 10, + } + .run(); +} + +#[test_traced] +fn validator_is_added_to_a_set_of_three() { + AssertValidatorIsAdded { + how_many_initial: 3, + epoch_length: 30, + } + .run(); +} + +#[test_traced] +fn validator_is_removed_from_set_of_two() { + AssertValidatorIsRemoved { + how_many_initial: 2, + epoch_length: 20, + } + .run(); +} + +#[test_traced] +fn validator_is_removed_from_set_of_four() { + AssertValidatorIsRemoved { + how_many_initial: 4, + epoch_length: 40, + } + .run(); +} + +struct AssertValidatorIsAdded { + how_many_initial: u32, + epoch_length: u64, +} + +impl AssertValidatorIsAdded { + fn run(self) { + let Self { + how_many_initial, + epoch_length, + } = self; + let _ = tempo_eyre::install(); + let setup = Setup::new() + .how_many_signers(how_many_initial) + .how_many_verifiers(1) + .t2_time(0) + .epoch_length(epoch_length); + + let cfg = Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + let added_uid = validators + .iter() + .find(|v| v.is_verifier()) + .unwrap() + .uid + .clone(); + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // We will send an arbitrary node of the initial validator set the smart + // contract call. + let http_url = validators + .iter() + .find(|v| v.is_signer()) + .unwrap() + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse::() + .unwrap(); + + let receipt = execution_runtime + .add_validator_v2( + http_url.clone(), + validators.iter().find(|v| v.is_verifier()).unwrap(), + ) + .await + .unwrap(); + + tracing::debug!( + block.number = receipt.block_number, + "addValidator call returned receipt" + ); + + let player_epoch = target_epoch(epoch_length, receipt.block_number.unwrap()); + let dealer_epoch = player_epoch.next(); + + 'becomes_signer: loop { + context.sleep(Duration::from_secs(1)).await; + + let mut entered_player_epoch = false; + let mut entered_dealer_epoch = false; + for line in context.encode().lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let key = parts.next().unwrap(); + let value = parts.next().unwrap(); + + assert_no_v1(key, value); + assert_no_dkg_failure(key, value); + + if key.ends_with("peer_manager_peers") { + assert_eq!( + how_many_initial + 1, + value.parse::().unwrap(), + "peers are registered on the next finalized block; this should have happened almost immediately", + ); + } + + if key.ends_with("_epoch_manager_latest_epoch") { + let epoch = value.parse::().unwrap(); + + if key.contains(&added_uid) { + entered_player_epoch |= epoch >= player_epoch.get(); + entered_dealer_epoch |= epoch >= dealer_epoch.get(); + } + + assert!( + epoch < dealer_epoch.next().get(), + "network reached epoch `{}` without added validator getting a share", + dealer_epoch.next(), + ); + } + + if entered_player_epoch && !entered_dealer_epoch { + if key.ends_with("_dkg_manager_ceremony_players") { + assert_eq!(how_many_initial + 1, value.parse::().unwrap(),) + } + if key.ends_with("_dkg_manager_ceremony_dealers") { + assert_eq!(how_many_initial, value.parse::().unwrap(),) + } + } + + if entered_dealer_epoch { + if key.ends_with("_dkg_manager_ceremony_dealers") { + assert_eq!(how_many_initial + 1, value.parse::().unwrap(),) + } + + if key.ends_with("_epoch_manager_how_often_signer_total") { + assert!(value.parse::().unwrap() > 0,); + break 'becomes_signer; + } + } + } + } + }) + } +} + +struct AssertValidatorIsRemoved { + how_many_initial: u32, + epoch_length: u64, +} + +impl AssertValidatorIsRemoved { + fn run(self) { + let Self { + how_many_initial, + epoch_length, + } = self; + let _ = tempo_eyre::install(); + let setup = Setup::new() + .how_many_signers(how_many_initial) + .t2_time(0) + .epoch_length(epoch_length); + + let cfg = Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // We will send an arbitrary node of the initial validator set the smart + // contract call. + let http_url = validators + .iter() + .find(|v| v.is_signer()) + .unwrap() + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse::() + .unwrap(); + + let removed_validator = validators.pop().unwrap(); + + let receipt = execution_runtime + .deactivate_validator_v2(http_url, &removed_validator) + .await + .unwrap(); + + tracing::debug!( + block.number = receipt.block_number, + "deactivateValidator call returned receipt" + ); + + let removal_epoch = target_epoch(epoch_length, receipt.block_number.unwrap()); + let removed_epoch = removal_epoch.next(); + + 'is_removed: loop { + context.sleep(Duration::from_secs(1)).await; + + let mut entered_removal_epoch = false; + let mut entered_removed_epoch = false; + for line in context.encode().lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let key = parts.next().unwrap(); + let value = parts.next().unwrap(); + + assert_no_v1(key, value); + assert_no_dkg_failure(key, value); + + if key.ends_with("ceremony_failures_total") { + assert_eq!(0, value.parse::().unwrap(),); + } + + if key.ends_with("_epoch_manager_latest_epoch") { + let epoch = value.parse::().unwrap(); + + assert!( + epoch < removed_epoch.next().get(), + "validator removal should have happened by epoch \ + `{removed_epoch}`, but network is already in epoch \ + {}", + removed_epoch.next(), + ); + + if key.contains(&removed_validator.uid) { + entered_removal_epoch |= epoch >= removal_epoch.get(); + } + + entered_removed_epoch |= epoch >= removed_epoch.get(); + } + + if entered_removal_epoch && !entered_removed_epoch { + if key.ends_with("_dkg_manager_ceremony_players") { + assert_eq!(how_many_initial - 1, value.parse::().unwrap(),) + } + if key.ends_with("_dkg_manager_ceremony_dealers") { + assert_eq!(how_many_initial, value.parse::().unwrap(),) + } + } + + if entered_removed_epoch && !key.contains(&removed_validator.uid) { + if key.ends_with("peer_manager_peers") { + assert_eq!( + how_many_initial - 1, + value.parse::().unwrap(), + "once the peer is deactivated and no longer a \ + dealer, it should be removed from the list of \ + peers immediately" + ); + } + + if key.ends_with("_dkg_manager_ceremony_dealers") { + assert_eq!(how_many_initial - 1, value.parse::().unwrap(),); + break 'is_removed; + } + } + } + } + }) + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/fast_sync_after_full_dkg.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/fast_sync_after_full_dkg.rs new file mode 100644 index 0000000000..134ceb19ee --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/fast_sync_after_full_dkg.rs @@ -0,0 +1,133 @@ +//! Tests for fast sync after a full DKG ceremony. + +use alloy::transports::http::reqwest::Url; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock as _, Runner as _, + deterministic::{Config, Runner}, +}; +use futures::future::join_all; +use reth_ethereum::storage::BlockNumReader as _; +use std::time::Duration; +use tracing::info; + +use super::common::{ + assert_no_dkg_failures, assert_skipped_rounds, wait_for_outcome, + wait_for_validators_to_reach_epoch, +}; +use crate::{Setup, setup_validators}; + +/// Tests that a late-joining validator can sync and participate after a full DKG ceremony. +/// +/// This verifies: +/// 1. A full DKG ceremony completes successfully (new polynomial, different public key) +/// 2. A validator that joins late (after full DKG) can sync the chain +/// 3. The late validator uses fast-sync to jump epoch boundaries (including the full DKG epoch) +/// 4. The late validator continues progressing after sync +#[test_traced] +fn validator_can_fast_sync_after_full_dkg() { + let _ = tempo_eyre::install(); + + let how_many_signers = 4; + let epoch_length = 20; + let full_dkg_epoch = 1; + let blocks_before_late_join = 3 * epoch_length + 1; + + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(epoch_length) + .t2_time(0) + .connect_execution_layer_nodes(true); + + let cfg = Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + let mut late_validator = validators.pop().unwrap(); + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + let http_url: Url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse() + .unwrap(); + + execution_runtime + .set_next_full_dkg_ceremony_v2(http_url, full_dkg_epoch) + .await + .unwrap(); + + let outcome_before = + wait_for_outcome(&context, &validators, full_dkg_epoch - 1, epoch_length).await; + assert!( + outcome_before.is_next_full_dkg, + "outcome.is_next_full_dkg should be `true`" + ); + + // wait for full DKG completion (-1 because late validator not started yet) + wait_for_validators_to_reach_epoch(&context, full_dkg_epoch + 1, how_many_signers - 1) + .await; + + let outcome_after = + wait_for_outcome(&context, &validators, full_dkg_epoch, epoch_length).await; + assert_ne!( + outcome_before.sharing().public(), + outcome_after.sharing().public(), + "full DKG must create different public key" + ); + + // wait for chain to advance + while validators[0] + .execution_provider() + .last_block_number() + .unwrap() + < blocks_before_late_join + { + context.sleep(Duration::from_secs(1)).await; + } + + // start late validator + late_validator.start(&context).await; + info!(id = late_validator.uid, "started late validator",); + assert_eq!( + late_validator + .execution_provider() + .last_block_number() + .unwrap(), + 0, + "Late validator should start at block 0" + ); + + // wait for late validator to catch up + while late_validator + .execution_provider() + .last_block_number() + .unwrap() + < blocks_before_late_join + { + context.sleep(Duration::from_millis(100)).await; + } + // ensure fast-sync was used to jump epoch boundaries (including from old to new sharing) + assert_skipped_rounds(&context); + + // verify continued progress + let block_after_sync = late_validator + .execution_provider() + .last_block_number() + .unwrap(); + context.sleep(Duration::from_secs(2)).await; + let block_later = late_validator + .execution_provider() + .last_block_number() + .unwrap(); + assert!( + block_later > block_after_sync, + "Late validator should keep progressing after sync" + ); + assert_no_dkg_failures(&context); + }) +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/full_ceremony.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/full_ceremony.rs new file mode 100644 index 0000000000..cf67c40f64 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/full_ceremony.rs @@ -0,0 +1,140 @@ +//! Tests for full DKG ceremonies triggered by `setNextFullDkgCeremony`. + +use alloy::transports::http::reqwest::Url; +use commonware_macros::test_traced; +use commonware_runtime::{ + Runner as _, + deterministic::{Config, Runner}, +}; +use futures::future::join_all; + +use super::common::{assert_no_dkg_failures, wait_for_outcome, wait_for_validators_to_reach_epoch}; +use crate::{Setup, setup_validators}; + +#[test_traced] +fn full_dkg_ceremony() { + FullDkgTest { + how_many_signers: 1, + epoch_length: 10, + full_dkg_epoch: 1, + } + .run(); +} + +struct FullDkgTest { + how_many_signers: u32, + epoch_length: u64, + full_dkg_epoch: u64, +} + +impl FullDkgTest { + fn run(self) { + let _ = tempo_eyre::install(); + + let setup = Setup::new() + .how_many_signers(self.how_many_signers) + .t2_time(0) + .epoch_length(self.epoch_length); + + let cfg = Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = setup_validators(&mut context, setup).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // Schedule full DKG for the specified epoch + let http_url: Url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse() + .unwrap(); + + execution_runtime + .set_next_full_dkg_ceremony_v2(http_url, self.full_dkg_epoch) + .await + .unwrap(); + + tracing::info!(full_dkg_epoch = self.full_dkg_epoch, "Scheduled full DKG"); + + // Step 1: Wait for and verify the is_next_full_dkg flag in epoch N-1 + let outcome_before = wait_for_outcome( + &context, + &validators, + self.full_dkg_epoch - 1, + self.epoch_length, + ) + .await; + + assert!( + outcome_before.is_next_full_dkg, + "Epoch {} outcome should have is_next_full_dkg=true", + self.full_dkg_epoch - 1 + ); + let pubkey_before = *outcome_before.sharing().public(); + tracing::info!(?pubkey_before, "Group public key BEFORE full DKG"); + + // Step 2: Wait for full DKG to complete (epoch N+1) + wait_for_validators_to_reach_epoch( + &context, + self.full_dkg_epoch + 1, + self.how_many_signers, + ) + .await; + assert_no_dkg_failures(&context); + + // Step 3: Verify full DKG created a NEW polynomial (different public key) + let outcome_after_full = wait_for_outcome( + &context, + &validators, + self.full_dkg_epoch, + self.epoch_length, + ) + .await; + + let pubkey_after_full = *outcome_after_full.sharing().public(); + tracing::info!(?pubkey_after_full, "Group public key AFTER full DKG"); + + assert_ne!( + pubkey_before, pubkey_after_full, + "Full DKG must produce a DIFFERENT group public key" + ); + tracing::info!("Verified: full DKG created independent polynomial"); + + // Step 4: Wait for reshare (epoch N+2) and verify it PRESERVES the public key + wait_for_validators_to_reach_epoch( + &context, + self.full_dkg_epoch + 2, + self.how_many_signers, + ) + .await; + assert_no_dkg_failures(&context); + + let outcome_after_reshare = wait_for_outcome( + &context, + &validators, + self.full_dkg_epoch + 1, + self.epoch_length, + ) + .await; + + assert!( + !outcome_after_reshare.is_next_full_dkg, + "Epoch {} should NOT have is_next_full_dkg flag", + self.full_dkg_epoch + 1 + ); + + let pubkey_after_reshare = *outcome_after_reshare.sharing().public(); + tracing::info!(?pubkey_after_reshare, "Group public key AFTER reshare"); + + assert_eq!( + pubkey_after_full, pubkey_after_reshare, + "Reshare must PRESERVE the group public key" + ); + tracing::info!("Verified: reshare preserved polynomial (full DKG only ran once)"); + }) + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/mod.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/mod.rs new file mode 100644 index 0000000000..b0bfed2b84 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/mod.rs @@ -0,0 +1,8 @@ +//! Tests on chain DKG and epoch transition + +pub(crate) mod common; +mod dynamic; +mod fast_sync_after_full_dkg; +mod full_ceremony; +mod share_loss; +mod static_transitions; diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/share_loss.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/share_loss.rs new file mode 100644 index 0000000000..bb5e651683 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/share_loss.rs @@ -0,0 +1,93 @@ +use std::time::Duration; + +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock as _, Metrics as _, Runner as _, + deterministic::{Config, Runner}, +}; +use futures::future::join_all; + +use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; + +#[test_traced] +fn validator_lost_share_but_gets_share_in_next_epoch() { + let _ = tempo_eyre::install(); + + let seed = 0; + + let cfg = Config::default().with_seed(seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let epoch_length = 20; + let setup = Setup::new() + .seed(seed) + .epoch_length(epoch_length) + .t2_time(0) + .connect_execution_layer_nodes(true); + + let (mut validators, _execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + let uid = { + let last_node = validators + .last_mut() + .expect("we just asked for a couple of validators"); + last_node + .consensus_config_mut() + .share + .take() + .expect("the node must have had a share"); + last_node.uid().to_string() + }; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + let mut node_forgot_share = false; + + 'acquire_share: loop { + context.sleep(Duration::from_secs(1)).await; + + let metrics = context.encode(); + + 'metrics: for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue 'metrics; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metrics.ends_with("_peers_blocked") { + let value = value.parse::().unwrap(); + assert_eq!(value, 0); + } + + if metric.ends_with("_epoch_manager_latest_epoch") { + let value = value.parse::().unwrap(); + assert!(value < 2, "reached 2nd epoch without recovering new share"); + } + + // Ensures that node has no share. + if !node_forgot_share + && metric.contains(&uid) + && metric.ends_with("_epoch_manager_how_often_verifier_total") + { + let value = value.parse::().unwrap(); + node_forgot_share = value > 0; + } + + // Ensure that the node gets a share by becoming a signer. + if node_forgot_share + && metric.contains(&uid) + && metric.ends_with("_epoch_manager_how_often_signer_total") + { + let value = value.parse::().unwrap(); + if value > 0 { + break 'acquire_share; + } + } + } + } + }); +} diff --git a/crates/e2e/src/tests/v2_at_genesis/dkg/static_transitions.rs b/crates/e2e/src/tests/v2_at_genesis/dkg/static_transitions.rs new file mode 100644 index 0000000000..9d142b2695 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/dkg/static_transitions.rs @@ -0,0 +1,121 @@ +//! Tests for successful DKG ceremonies with static sets of validators. +//! +//! Contains test for DKG transition logic +//! at genesis. +use commonware_macros::test_traced; + +use crate::{Setup, run, tests::v2_at_genesis::assert_no_v1}; + +#[test_traced] +fn single_validator_can_transition_once() { + AssertStaticTransitions { + how_many: 1, + epoch_length: 5, + transitions: 1, + } + .run(); +} + +#[test_traced] +fn single_validator_can_transition_twice() { + AssertStaticTransitions { + how_many: 1, + epoch_length: 5, + transitions: 2, + } + .run(); +} + +#[test_traced] +fn single_validator_can_transition_four_times() { + AssertStaticTransitions { + how_many: 1, + epoch_length: 5, + transitions: 4, + } + .run(); +} + +#[test_traced] +fn two_validators_can_transition_once() { + AssertStaticTransitions { + how_many: 2, + epoch_length: 20, + transitions: 1, + } + .run(); +} + +#[test_traced] +fn two_validators_can_transition_twice() { + AssertStaticTransitions { + how_many: 2, + epoch_length: 20, + transitions: 2, + } + .run(); +} + +#[test_traced] +fn four_validators_can_transition_once() { + AssertStaticTransitions { + how_many: 4, + epoch_length: 20, + transitions: 1, + } + .run(); +} + +#[test_traced] +fn four_validators_can_transition_twice() { + AssertStaticTransitions { + how_many: 4, + epoch_length: 20, + transitions: 2, + } + .run(); +} + +struct AssertStaticTransitions { + how_many: u32, + epoch_length: u64, + transitions: u64, +} + +impl AssertStaticTransitions { + fn run(self) { + let Self { + how_many, + epoch_length, + transitions, + } = self; + let _ = tempo_eyre::install(); + + let setup = Setup::new() + .how_many_signers(how_many) + .epoch_length(epoch_length) + .t2_time(0); + + let mut epoch_reached = false; + let mut dkg_successful = false; + let _first = run(setup, move |metric, value| { + assert_no_v1(metric, value); + + if metric.ends_with("_dkg_manager_ceremony_failures_total") { + let value = value.parse::().unwrap(); + assert_eq!(0, value); + } + + if metric.ends_with("_epoch_manager_latest_epoch") { + let value = value.parse::().unwrap(); + epoch_reached |= value >= transitions; + } + if metric.ends_with("_dkg_manager_ceremony_successes_total") { + let value = value.parse::().unwrap(); + dkg_successful |= value >= transitions; + } + + epoch_reached && dkg_successful + }); + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/mod.rs b/crates/e2e/src/tests/v2_at_genesis/mod.rs new file mode 100644 index 0000000000..a603decfc0 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/mod.rs @@ -0,0 +1,30 @@ +//! Tests on chain DKG and epoch transition + +mod backfill; +mod consensus_rpc; +mod dkg; +mod restart; +mod simple; +mod snapshot; + +// FIXME: subblocks are currently flaky. Don't want to add extra flaky tests +// right now. +// mod subblocks; + +fn assert_no_v1(metric: &str, value: &str) { + if metric.ends_with("_dkg_manager_read_players_from_v1_contract_total") { + assert_eq!(0, value.parse::().unwrap()); + } + if metric.ends_with("_dkg_manager_syncing_players") { + assert_eq!(0, value.parse::().unwrap()); + } + if metric.ends_with("_dkg_manager_read_re_dkg_epoch_from_v1_contract_total") { + assert_eq!(0, value.parse::().unwrap()); + } +} + +fn assert_no_dkg_failure(metric: &str, value: &str) { + if metric.ends_with("_dkg_manager_ceremony_failures_total") { + assert_eq!(0, value.parse::().unwrap(),); + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/restart.rs b/crates/e2e/src/tests/v2_at_genesis/restart.rs new file mode 100644 index 0000000000..3fe221f8af --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/restart.rs @@ -0,0 +1,544 @@ +//! Tests for validator restart/kill scenarios +//! +//! These tests verify that validators can be killed and restarted, and that they +//! properly catch up to the rest of the network after restart. + +use std::time::Duration; + +use commonware_consensus::types::{Epocher, FixedEpocher, Height}; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock, Metrics as _, Runner as _, + deterministic::{self, Context, Runner}, +}; +use commonware_utils::NZU64; +use futures::future::join_all; +use rand_08::Rng; +use tracing::debug; + +use crate::{CONSENSUS_NODE_PREFIX, Setup, setup_validators}; + +#[test_traced("WARN")] +fn committee_of_one() { + SimpleRestart { + committee_size: 1, + epoch_length: 10, + restart_after: 5, + stop_at: 10, + connect_execution_layer: false, + } + .run() +} + +#[test_traced("WARN")] +fn committee_of_three() { + SimpleRestart { + committee_size: 3, + epoch_length: 10, + restart_after: 5, + stop_at: 10, + connect_execution_layer: false, + } + .run() +} + +struct SimpleRestart { + committee_size: u32, + epoch_length: u64, + restart_after: u64, + stop_at: u64, + connect_execution_layer: bool, +} + +impl SimpleRestart { + #[track_caller] + fn run(self) { + let Self { + committee_size, + epoch_length, + restart_after, + stop_at, + connect_execution_layer, + } = self; + let _ = tempo_eyre::install(); + + let setup = Setup::new() + .how_many_signers(committee_size) + .seed(0) + .epoch_length(epoch_length) + .t2_time(0) + .connect_execution_layer_nodes(connect_execution_layer); + + let cfg = deterministic::Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, _execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + debug!( + height = restart_after, + "waiting for network to reach target height before stopping a validator", + ); + wait_for_height(&context, setup.how_many_signers, restart_after, false).await; + + validators[0].stop().await; + debug!(public_key = %validators[0].public_key(), "stopped validator"); + + // wait a bit to let the network settle; some finalizations come in later + context.sleep(Duration::from_secs(5)).await; + ensure_no_progress(&context, 5).await; + + validators[0].start(&context).await; + debug!( + public_key = %validators[0].public_key(), + "restarted validator", + ); + + debug!( + height = stop_at, + "waiting for reconstituted validators to reach target height to reach test success", + ); + wait_for_height(&context, validators.len() as u32, stop_at, false).await; + }) + } +} + +#[test_traced] +fn validator_catches_up_to_network_during_epoch() { + let _ = tempo_eyre::install(); + + RestartSetup { + epoch_length: 100, + shutdown_height: 5, + restart_height: 10, + final_height: 15, + assert_skips: false, + connect_execution_layer: false, + } + .run(); +} + +#[test_traced] +fn validator_catches_up_with_gap_of_one_epoch() { + let _ = tempo_eyre::install(); + + let epoch_length = 30; + RestartSetup { + epoch_length, + shutdown_height: epoch_length + 1, + restart_height: 2 * epoch_length + 1, + final_height: 3 * epoch_length + 1, + assert_skips: false, + connect_execution_layer: false, + } + .run(); +} + +#[test_traced] +fn validator_catches_up_with_gap_of_three_epochs() { + let _ = tempo_eyre::install(); + + let epoch_length = 30; + RestartSetup { + epoch_length, + shutdown_height: epoch_length + 1, + restart_height: 4 * epoch_length + 1, + final_height: 5 * epoch_length + 1, + assert_skips: true, + connect_execution_layer: true, + } + .run(); +} + +#[test_traced] +fn single_node_recovers_after_finalizing_ceremony() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 1, + epoch_length: 6, + shutdown_after_finalizing: ShutdownAfterFinalizing::Ceremony, + } + .run() +} + +#[test_traced] +fn node_recovers_after_finalizing_ceremony_four_validators() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 4, + epoch_length: 30, + shutdown_after_finalizing: ShutdownAfterFinalizing::Ceremony, + } + .run() +} + +#[test_traced] +fn node_recovers_after_finalizing_middle_of_epoch_four_validators() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 4, + epoch_length: 30, + shutdown_after_finalizing: ShutdownAfterFinalizing::MiddleOfEpoch, + } + .run() +} + +#[test_traced] +fn node_recovers_before_finalizing_middle_of_epoch_four_validators() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 4, + epoch_length: 30, + shutdown_after_finalizing: ShutdownAfterFinalizing::BeforeMiddleOfEpoch, + } + .run() +} + +#[test_traced] +fn single_node_recovers_after_finalizing_boundary() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 1, + epoch_length: 10, + shutdown_after_finalizing: ShutdownAfterFinalizing::Boundary, + } + .run() +} + +#[test_traced] +fn node_recovers_after_finalizing_boundary_four_validators() { + AssertNodeRecoversAfterFinalizingBlock { + n_validators: 4, + epoch_length: 30, + shutdown_after_finalizing: ShutdownAfterFinalizing::Boundary, + } + .run() +} + +/// Test configuration for restart scenarios +#[derive(Clone)] +struct RestartSetup { + // The epoch length to use. + epoch_length: u64, + /// Height at which to shutdown a validator + shutdown_height: u64, + /// Height at which to restart the validator + restart_height: u64, + /// Final height that all validators (including restarted) must reach + final_height: u64, + /// Whether to assert that DKG rounds were skipped + assert_skips: bool, + /// Whether to connect the execution layer. + connect_execution_layer: bool, +} + +impl RestartSetup { + #[track_caller] + fn run(self) { + let Self { + epoch_length, + shutdown_height, + restart_height, + final_height, + assert_skips, + connect_execution_layer, + } = self; + let _ = tempo_eyre::install(); + + let setup = Setup::new() + .epoch_length(epoch_length) + .t2_time(0) + .connect_execution_layer_nodes(connect_execution_layer); + let cfg = deterministic::Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, _execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + debug!( + height = shutdown_height, + "waiting for network to reach target height before stopping a validator", + ); + wait_for_height( + &context, + setup.how_many_signers, + shutdown_height, + false, + ) + .await; + + // Randomly select a validator to kill + let idx = context.gen_range(0..validators.len()); + validators[idx].stop().await; + + debug!(public_key = %validators[idx].public_key(), "stopped a random validator"); + + debug!( + height = restart_height, + "waiting for remaining validators to reach target height before restarting validator", + ); + wait_for_height( + &context, + setup.how_many_signers - 1, + restart_height, + false, + ) + .await; + + debug!("target height reached, restarting stopped validator"); + validators[idx].start(&context).await; + debug!( + public_key = %validators[idx].public_key(), + "restarted validator", + ); + + debug!( + height = final_height, + "waiting for reconstituted validators to reach target height to reach test success", + ); + wait_for_height( + &context, + setup.how_many_signers, + final_height, + assert_skips, + ) + .await; + }) + } +} + +/// Wait for a specific number of validators to reach a target height +async fn wait_for_height( + context: &Context, + expected_validators: u32, + target_height: u64, + assert_skips: bool, +) { + let mut skips_observed = false; + loop { + let metrics = context.encode(); + let mut validators_at_height = 0; + + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + // Check if this is a height metric + if metric.ends_with("_marshal_processed_height") { + let height = value.parse::().unwrap(); + if height >= target_height { + validators_at_height += 1; + } + } + if metric.ends_with("_rounds_skipped_total") { + let count = value.parse::().unwrap(); + skips_observed |= count > 0; + } + } + if validators_at_height == expected_validators { + assert!(!assert_skips || skips_observed); + break; + } + context.sleep(Duration::from_secs(1)).await; + } +} + +/// Ensures that no more finalizations happen. +async fn ensure_no_progress(context: &Context, tries: u32) { + let baseline = { + let metrics = context.encode(); + let mut height = None; + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + let mut parts = line.split_whitespace(); + let metrics = parts.next().unwrap(); + let value = parts.next().unwrap(); + if metrics.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + if Some(value) > height { + height.replace(value); + } + } + } + height.expect("processed height is a metric") + }; + for _ in 0..=tries { + context.sleep(Duration::from_secs(1)).await; + + let metrics = context.encode(); + let mut height = None; + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + let mut parts = line.split_whitespace(); + let metrics = parts.next().unwrap(); + let value = parts.next().unwrap(); + if metrics.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + if Some(value) > height { + height.replace(value); + } + } + } + let height = height.expect("processed height is a metric"); + if height != baseline { + panic!( + "height has changed, progress was made while the network was \ + stopped: baseline = `{baseline}`, progressed_to = `{height}`" + ); + } + } +} +enum ShutdownAfterFinalizing { + Boundary, + Ceremony, + BeforeMiddleOfEpoch, + MiddleOfEpoch, +} + +impl ShutdownAfterFinalizing { + fn is_target_height(&self, epoch_length: u64, block_height: Height) -> bool { + let epoch_strategy = FixedEpocher::new(NZU64!(epoch_length)); + match self { + // NOTE: ceremonies are finalized on the pre-to-last block, so + // block + 1 needs to be the boundary / last block. + Self::Ceremony => { + block_height.next() + == epoch_strategy + .containing(block_height.next()) + .unwrap() + .last() + } + Self::Boundary => { + block_height == epoch_strategy.containing(block_height).unwrap().last() + } + Self::BeforeMiddleOfEpoch => { + block_height.next().get().rem_euclid(epoch_length) == epoch_length / 2 + } + Self::MiddleOfEpoch => block_height.get().rem_euclid(epoch_length) == epoch_length / 2, + } + } +} + +impl std::fmt::Display for ShutdownAfterFinalizing { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let msg = match self { + Self::Boundary => "boundary", + Self::Ceremony => "ceremony", + Self::BeforeMiddleOfEpoch => "before-middle-of-epoch", + Self::MiddleOfEpoch => "middle-of-epoch", + }; + f.write_str(msg) + } +} + +struct AssertNodeRecoversAfterFinalizingBlock { + n_validators: u32, + epoch_length: u64, + shutdown_after_finalizing: ShutdownAfterFinalizing, +} + +impl AssertNodeRecoversAfterFinalizingBlock { + fn run(self) { + let _ = tempo_eyre::install(); + + let Self { + n_validators, + epoch_length, + shutdown_after_finalizing, + } = self; + + let setup = Setup::new() + .how_many_signers(n_validators) + .t2_time(0) + .epoch_length(epoch_length); + + let cfg = deterministic::Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, _execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + join_all(validators.iter_mut().map(|node| node.start(&context))).await; + + // Catch a node right after it processed the pre-to-boundary height. + // Best-effort: we hot-loop in 100ms steps, but if processing is too + // fast we might miss the window and the test will succeed no matter + // what. + let (stopped_val_metric, height) = 'wait_to_boundary: loop { + let metrics = context.encode(); + 'lines: for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue 'lines; + } + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metric.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + if shutdown_after_finalizing + .is_target_height(setup.epoch_length, Height::new(value)) + { + break 'wait_to_boundary (metric.to_string(), value); + } + } + } + context.sleep(Duration::from_millis(100)).await; + }; + + tracing::debug!( + stopped_val_metric, + height, + target = %shutdown_after_finalizing, + "found a node that finalized the target height", + ); + // Now restart the node for which we found the metric. + let idx = validators + .iter() + .position(|node| stopped_val_metric.contains(node.uid())) + .unwrap(); + let uid = validators[idx].uid.clone(); + validators[idx].stop().await; + validators[idx].start(&context).await; + + let mut iteration = 0; + 'look_for_progress: loop { + context.sleep(Duration::from_secs(1)).await; + let metrics = context.encode(); + 'lines: for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue 'lines; + } + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + if metric.contains(&uid) + && metric.ends_with("_marshal_processed_height") + && value.parse::().unwrap() > height + 10 + { + break 'look_for_progress; + } + if metric.ends_with("ceremony_bad_dealings") { + assert_eq!(value.parse::().unwrap(), 0); + } + } + iteration += 1; + assert!( + iteration < 10, + "node did not progress for 10 iterations; restart on boundary likely failed" + ); + } + }); + } +} diff --git a/crates/e2e/src/tests/v2_at_genesis/simple.rs b/crates/e2e/src/tests/v2_at_genesis/simple.rs new file mode 100644 index 0000000000..46d55008b8 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/simple.rs @@ -0,0 +1,96 @@ +//! Simple tests: just start and build a few blocks. +use std::time::Duration; + +use crate::{Setup, run, tests::v2_at_genesis::assert_no_v1}; +use commonware_macros::test_traced; +use commonware_p2p::simulated::Link; + +#[test_traced] +fn single_node() { + let _ = tempo_eyre::install(); + + let setup = Setup::new() + .how_many_signers(1) + .epoch_length(100) + .t2_time(0) + .seed(0); + let _first = run(setup, |metric, value| { + assert_no_v1(metric, value); + if metric.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + value >= 5 + } else { + false + } + }); +} + +#[test_traced] +fn only_good_links() { + let _ = tempo_eyre::install(); + + let setup = Setup::new().epoch_length(100).t2_time(0).seed(42); + let _first = run(setup, |metric, value| { + assert_no_v1(metric, value); + if metric.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + value >= 5 + } else { + false + } + }); +} + +#[test_traced] +fn many_bad_links() { + let _ = tempo_eyre::install(); + + let link = Link { + latency: Duration::from_millis(200), + jitter: Duration::from_millis(150), + success_rate: 0.75, + }; + + let setup = Setup::new() + .seed(42) + .epoch_length(100) + .t2_time(0) + .linkage(link); + + let _first = run(setup, |metric, value| { + assert_no_v1(metric, value); + if metric.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + value >= 5 + } else { + false + } + }); +} + +#[test_traced] +fn reach_height_20_with_a_few_bad_links() { + let _ = tempo_eyre::install(); + + let link = Link { + latency: Duration::from_millis(80), + jitter: Duration::from_millis(10), + success_rate: 0.98, + }; + + let setup = Setup::new() + .how_many_signers(10) + .epoch_length(100) + .t2_time(0) + .linkage(link); + + run(setup, |metric, value| { + assert_no_v1(metric, value); + if metric.ends_with("_marshal_processed_height") { + let value = value.parse::().unwrap(); + value >= 20 + } else { + false + } + }); +} diff --git a/crates/e2e/src/tests/v2_at_genesis/snapshot.rs b/crates/e2e/src/tests/v2_at_genesis/snapshot.rs new file mode 100644 index 0000000000..451ba9dd2c --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/snapshot.rs @@ -0,0 +1,510 @@ +//! Tests for syncing nodes from scratch. +//! +//! These tests are similar to the tests in [`crate::tests::restart`], but +//! assume that the node has never been run but been given a synced execution +//! layer database./// Runs a validator restart test with the given configuration + +use std::time::Duration; + +use alloy::transports::http::reqwest::Url; +use commonware_consensus::types::{Epocher as _, FixedEpocher, Height}; +use commonware_macros::test_traced; +use commonware_runtime::{ + Clock as _, Metrics as _, Runner as _, + deterministic::{self, Context, Runner}, +}; +use commonware_utils::NZU64; +use futures::future::join_all; +use reth_ethereum::provider::BlockNumReader as _; +use tracing::info; + +use crate::{ + CONSENSUS_NODE_PREFIX, Setup, setup_validators, + tests::v2_at_genesis::dkg::common::wait_for_outcome, +}; + +/// This is a lengthy test. First, a validator needs to be run for a sufficiently +/// long time to populate its database. Then, a new validator is rotated in +/// by taking the replaced validator's database. This simulates starting from +/// a snapshot. +#[test_traced] +fn joins_from_snapshot() { + let _ = tempo_eyre::install(); + + let epoch_length = 20; + // Create a verifier that we will never start. It just the private keys + // we desire. + let setup = Setup::new() + .how_many_signers(4) + .how_many_verifiers(1) + .t2_time(0) + .connect_execution_layer_nodes(true) + .epoch_length(epoch_length); + let cfg = deterministic::Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + // The replacement validator that will start later. + let mut replacement = { + let idx = validators + .iter() + .position(|node| node.consensus_config().share.is_none()) + .expect("at least one node must be a verifier, i.e. not have a share"); + validators.remove(idx) + }; + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // The validator that will donate it its database to the replacement. + let mut donor = validators.pop().unwrap(); + + let http_url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse::() + .unwrap(); + + // Validator setup generated 2 different addresses for both validators. + // Make them the same so that ValidatorConfigV2.rotateValidator knows + // which one to target. + replacement.chain_address = donor.chain_address; + let receipt = execution_runtime + .rotate_validator(http_url, &replacement) + .await + .unwrap(); + + let rotate_height = Height::new(receipt.block_number.unwrap()); + tracing::debug!( + block.height = %rotate_height, + "validatorConfigV2.rotateValidator executed", + ); + + // Wait for the next DKG outcome - unless rotate_height is on a boundary. + // Then wait one more epoch. + let epoch_strat = FixedEpocher::new(NZU64!(epoch_length)); + let info = epoch_strat.containing(rotate_height).unwrap(); + let target_epoch = if info.last() == rotate_height { + info.epoch().next() + } else { + info.epoch() + }; + + let outcome_start_rotation = + wait_for_outcome(&context, &validators, target_epoch.get(), epoch_length).await; + + assert!( + outcome_start_rotation + .players() + .position(&donor.public_key()) + .is_some() + ); + assert!( + outcome_start_rotation + .next_players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_start_rotation + .players() + .position(&replacement.public_key()) + .is_none() + ); + assert!( + outcome_start_rotation + .next_players() + .position(&replacement.public_key()) + .is_some() + ); + + let outcome_finish_rotation = wait_for_outcome( + &context, + &validators, + target_epoch.next().get(), + epoch_length, + ) + .await; + + assert!( + outcome_finish_rotation + .players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_finish_rotation + .next_players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_finish_rotation + .players() + .position(&replacement.public_key()) + .is_some() + ); + assert!( + outcome_finish_rotation + .next_players() + .position(&replacement.public_key()) + .is_some() + ); + + info!("new validator was added to the committee, but not started"); + + donor.stop().await; + let last_epoch_before_stop = latest_epoch_of_validator(&context, &donor.uid); + info!(%last_epoch_before_stop, "stopped the original validator"); + + // Now the old validator donates its database to the new validator. + // + // This works by assigning the replacement validator's fields to the + // old validator's. This way, the old validator "donates" its database + // to the replacement. This is to simulate a snapshot. + donor.uid = replacement.uid; + donor.private_key = replacement.private_key; + { + let peer_manager = replacement.consensus_config.peer_manager.clone(); + donor.consensus_config = replacement.consensus_config; + donor.consensus_config.peer_manager = peer_manager; + } + donor.network_address = replacement.network_address; + donor.chain_address = replacement.chain_address; + donor.start(&context).await; + + // Rename, so that it's less confusing below. + let replacement = donor; + + info!( + uid = %replacement.uid, + "started the validator with a changed identity", + ); + + loop { + context.sleep(Duration::from_secs(1)).await; + + let metrics = context.encode(); + let mut validators_at_epoch = 0; + + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metric.ends_with("_epoch_manager_latest_epoch") { + let epoch = value.parse::().unwrap(); + + assert!( + epoch < last_epoch_before_stop + 4, + "network advanced 4 epochs before without the new \ + validator catching up; there is likely a bug", + ); + + if metric.contains(&replacement.uid) { + assert!( + epoch >= last_epoch_before_stop, + "the replacement validator should never enter epochs \ + older than what is in the snapshot" + ); + } + + if epoch > last_epoch_before_stop { + validators_at_epoch += 1; + } + + if metric.contains(&replacement.uid) { + // -1 to account for stopping on boundaries. + assert!( + epoch >= last_epoch_before_stop.saturating_sub(1), + "when starting from snapshot, older epochs must never \ + had consensus engines running" + ); + } + } + } + if validators_at_epoch == 4 { + break; + } + } + }); +} + +/// This test is the same as `joins_from_snapshot`, but with the extra condition +/// that the validator can restart (stop, start), after having booted from a +/// snapshot. +#[test_traced] +fn can_restart_after_joining_from_snapshot() { + let _ = tempo_eyre::install(); + + let epoch_length = 20; + // Create a verifier that we will never start. It just the private keys + // we desire. + let setup = Setup::new() + .how_many_signers(4) + .how_many_verifiers(1) + .t2_time(0) + .connect_execution_layer_nodes(true) + .epoch_length(epoch_length); + let cfg = deterministic::Config::default().with_seed(setup.seed); + let executor = Runner::from(cfg); + + executor.start(|mut context| async move { + let (mut validators, execution_runtime) = + setup_validators(&mut context, setup.clone()).await; + + // The replacement validator that will start later. + let mut replacement = { + let idx = validators + .iter() + .position(|node| node.consensus_config().share.is_none()) + .expect("at least one node must be a verifier, i.e. not have a share"); + validators.remove(idx) + }; + join_all(validators.iter_mut().map(|v| v.start(&context))).await; + + // The validator that will donate it its database to the replacement. + let mut donor = validators.pop().unwrap(); + + let http_url = validators[0] + .execution() + .rpc_server_handle() + .http_url() + .unwrap() + .parse::() + .unwrap(); + + // Validator setup generated 2 different addresses for both validators. + // Make them the same so that ValidatorConfigV2.rotateValidator knows + // which one to target. + replacement.chain_address = donor.chain_address; + let receipt = execution_runtime + .rotate_validator(http_url, &replacement) + .await + .unwrap(); + + let rotate_height = Height::new(receipt.block_number.unwrap()); + tracing::debug!( + block.height = %rotate_height, + "validatorConfigV2.rotateValidator executed", + ); + + // Wait for the next DKG outcome - unless rotate_height is on a boundary. + // Then wait one more epoch. + let epoch_strat = FixedEpocher::new(NZU64!(epoch_length)); + let info = epoch_strat.containing(rotate_height).unwrap(); + let target_epoch = if info.last() == rotate_height { + info.epoch().next() + } else { + info.epoch() + }; + + let outcome_start_rotation = + wait_for_outcome(&context, &validators, target_epoch.get(), epoch_length).await; + + assert!( + outcome_start_rotation + .players() + .position(&donor.public_key()) + .is_some() + ); + assert!( + outcome_start_rotation + .next_players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_start_rotation + .players() + .position(&replacement.public_key()) + .is_none() + ); + assert!( + outcome_start_rotation + .next_players() + .position(&replacement.public_key()) + .is_some() + ); + + let outcome_finish_rotation = wait_for_outcome( + &context, + &validators, + target_epoch.next().get(), + epoch_length, + ) + .await; + + assert!( + outcome_finish_rotation + .players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_finish_rotation + .next_players() + .position(&donor.public_key()) + .is_none() + ); + assert!( + outcome_finish_rotation + .players() + .position(&replacement.public_key()) + .is_some() + ); + assert!( + outcome_finish_rotation + .next_players() + .position(&replacement.public_key()) + .is_some() + ); + + info!("new validator was added to the committee, but not started"); + + donor.stop().await; + let last_epoch_before_stop = latest_epoch_of_validator(&context, &donor.uid); + info!(%last_epoch_before_stop, "stopped the original validator"); + + // Now the old validator donates its database to the new validator. + // + // This works by assigning the replacement validator's fields to the + // old validator's. This way, the old validator "donates" its database + // to the replacement. This is to simulate a snapshot. + donor.uid = replacement.uid; + donor.private_key = replacement.private_key; + { + let peer_manager = replacement.consensus_config.peer_manager.clone(); + donor.consensus_config = replacement.consensus_config; + donor.consensus_config.peer_manager = peer_manager; + } + donor.network_address = replacement.network_address; + donor.chain_address = replacement.chain_address; + donor.start(&context).await; + + // Rename, so that it's less confusing below. + let mut replacement = donor; + + info!( + uid = %replacement.uid, + "started the validator with a changed identity", + ); + + loop { + context.sleep(Duration::from_secs(1)).await; + + let metrics = context.encode(); + let mut validators_at_epoch = 0; + + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metric.ends_with("_epoch_manager_latest_epoch") { + let epoch = value.parse::().unwrap(); + + assert!( + epoch < last_epoch_before_stop + 4, + "network advanced 4 epochs before without the new \ + validator catching up; there is likely a bug", + ); + + if metric.contains(&replacement.uid) { + assert!( + epoch >= last_epoch_before_stop, + "the replacement validator should never enter epochs \ + older than what is in the snapshot" + ); + } + + if epoch > last_epoch_before_stop { + validators_at_epoch += 1; + } + + if metric.contains(&replacement.uid) { + // -1 to account for stopping on boundaries. + assert!( + epoch >= last_epoch_before_stop.saturating_sub(1), + "when starting from snapshot, older epochs must never \ + had consensus engines running" + ); + } + } + } + if validators_at_epoch == 4 { + break; + } + } + + // Restart the node. This ensures that it's state is still sound after + // doing a snapshot sync. + replacement.stop().await; + + let network_head = validators[0] + .execution_provider() + .best_block_number() + .unwrap(); + + replacement.start(&context).await; + + info!( + network_head, + "restarting the node and waiting for it to catch up" + ); + + 'progress: loop { + context.sleep(Duration::from_secs(1)).await; + + let metrics = context.encode(); + + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metric.contains(&replacement.uid) + && metric.ends_with("_marshal_processed_height") + && value.parse::().unwrap() > network_head + { + break 'progress; + } + } + } + }); +} + +fn latest_epoch_of_validator(context: &Context, id: &str) -> u64 { + let metrics = context.encode(); + + for line in metrics.lines() { + if !line.starts_with(CONSENSUS_NODE_PREFIX) { + continue; + } + + let mut parts = line.split_whitespace(); + let metric = parts.next().unwrap(); + let value = parts.next().unwrap(); + + if metric.ends_with("_epoch_manager_latest_epoch") && metric.contains(id) { + return value.parse::().unwrap(); + } + } + + panic!("validator had no entry for latest epoch"); +} diff --git a/crates/e2e/src/tests/v2_at_genesis/subblocks.rs b/crates/e2e/src/tests/v2_at_genesis/subblocks.rs new file mode 100644 index 0000000000..6687e6c610 --- /dev/null +++ b/crates/e2e/src/tests/v2_at_genesis/subblocks.rs @@ -0,0 +1,442 @@ +use std::{collections::HashMap, time::Duration}; + +use alloy::{ + consensus::{Transaction, TxReceipt}, + rlp::Decodable, + signers::local::PrivateKeySigner, +}; +use alloy_network::{TxSignerSync, eip2718::Encodable2718}; +use alloy_primitives::{Address, TxHash, U256, b256}; +use commonware_macros::test_traced; +use commonware_runtime::{ + Runner as _, + deterministic::{Config, Runner}, +}; +use futures::{StreamExt, future::join_all}; +use reth_ethereum::{ + chainspec::{ChainSpecProvider, EthChainSpec}, + rpc::eth::EthApiServer, +}; +use reth_node_builder::ConsensusEngineEvent; +use reth_node_core::primitives::transaction::TxHashRef; +use tempo_chainspec::spec::{SYSTEM_TX_COUNT, TEMPO_T1_BASE_FEE}; +use tempo_node::primitives::{ + SubBlockMetadata, TempoTransaction, TempoTxEnvelope, + subblock::{PartialValidatorKey, TEMPO_SUBBLOCK_NONCE_KEY_PREFIX}, + transaction::{Call, calc_gas_balance_spending}, +}; +use tempo_precompiles::{ + DEFAULT_FEE_TOKEN, NONCE_PRECOMPILE_ADDRESS, nonce::NonceManager, tip20::TIP20Token, +}; + +use tempo_node::consensus::TEMPO_SHARED_GAS_DIVISOR; + +use crate::{Setup, TestingNode, setup_validators}; + +#[test_traced] +fn subblocks_are_included() { + let _ = tempo_eyre::install(); + + Runner::from(Config::default().with_seed(0)).start(|mut context| async move { + let how_many_signers = 4; + + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(10); + + // Setup and start all nodes. + let (mut nodes, _execution_runtime) = setup_validators(&mut context, setup.clone()).await; + + let mut fee_recipients = Vec::new(); + + for node in &mut nodes { + // Due to how Commonware deterministic runtime behaves in CI, we need to bump this timeout + // to ensure that payload builder has enough time to accumulate subblocks. + node.consensus_config_mut().new_payload_wait_time = Duration::from_millis(500); + + let fee_recipient = Address::random(); + node.consensus_config_mut().fee_recipient = fee_recipient; + fee_recipients.push(fee_recipient); + } + + join_all(nodes.iter_mut().map(|node| node.start(&context))).await; + + let mut stream = nodes[0] + .execution() + .add_ons_handle + .engine_events + .new_listener(); + + let mut expected_transactions: Vec = Vec::new(); + while let Some(update) = stream.next().await { + let block = match update { + ConsensusEngineEvent::BlockReceived(_) + | ConsensusEngineEvent::ForkchoiceUpdated(_, _) + | ConsensusEngineEvent::CanonicalChainCommitted(_, _) => continue, + ConsensusEngineEvent::ForkBlockAdded(_, _) => unreachable!("unexpected reorg"), + ConsensusEngineEvent::InvalidBlock(_) => unreachable!("unexpected invalid block"), + ConsensusEngineEvent::CanonicalBlockAdded(block, _) => block, + }; + + let receipts = &block.execution_outcome().receipts; + + // Assert that block only contains our subblock transactions and the system transactions + assert_eq!( + block.sealed_block().body().transactions.len(), + SYSTEM_TX_COUNT + expected_transactions.len() + ); + + // Assert that all expected transactions are included in the block. + for tx in expected_transactions.drain(..) { + if !block + .sealed_block() + .body() + .transactions + .iter() + .any(|t| t.tx_hash() == *tx) + { + panic!("transaction {tx} was not included"); + } + } + + // Assert that all transactions were successful + for receipt in receipts { + assert!(receipt.status()); + } + + if !expected_transactions.is_empty() { + let fee_token_storage = &block + .execution_outcome() + .state + .state + .get(&DEFAULT_FEE_TOKEN) + .unwrap() + .storage; + + // Assert that all validators were paid for their subblock transactions + for fee_recipient in &fee_recipients { + let balance_slot = TIP20Token::from_address(DEFAULT_FEE_TOKEN) + .unwrap() + .balances[*fee_recipient] + .slot(); + let slot = fee_token_storage.get(&balance_slot).unwrap(); + + assert!(slot.present_value > slot.original_value()); + } + } + + // Exit once we reach height 20. + if block.block_number() == 20 { + break; + } + + // Send subblock transactions to all nodes. + for node in nodes.iter() { + for _ in 0..5 { + expected_transactions.push(submit_subblock_tx(node).await); + } + } + } + }); +} + +#[test_traced] +fn subblocks_are_included_with_failing_txs() { + let _ = tempo_eyre::install(); + + Runner::from(Config::default().with_seed(0)).start(|mut context| async move { + let how_many_signers = 5; + + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(10); + + // Setup and start all nodes. + let (mut nodes, _execution_runtime) = setup_validators(&mut context, setup.clone()).await; + + let mut fee_recipients = Vec::new(); + + for node in &mut nodes { + // Due to how Commonware deterministic runtime behaves in CI, we need to bump this timeout + // to ensure that payload builder has enough time to accumulate subblocks. + node.consensus_config_mut().new_payload_wait_time = Duration::from_millis(500); + + let fee_recipient = Address::random(); + node.consensus_config_mut().fee_recipient = fee_recipient; + fee_recipients.push(fee_recipient); + } + + join_all(nodes.iter_mut().map(|node| node.start(&context))).await; + + let mut stream = nodes[0] + .execution() + .add_ons_handle + .engine_events + .new_listener(); + + let mut expected_transactions: Vec = Vec::new(); + let mut failing_transactions: Vec = Vec::new(); + while let Some(update) = stream.next().await { + let block = match update { + ConsensusEngineEvent::BlockReceived(_) + | ConsensusEngineEvent::ForkchoiceUpdated(_, _) + | ConsensusEngineEvent::CanonicalChainCommitted(_, _) => continue, + ConsensusEngineEvent::ForkBlockAdded(_, _) => unreachable!("unexpected reorg"), + ConsensusEngineEvent::InvalidBlock(_) => unreachable!("unexpected invalid block"), + ConsensusEngineEvent::CanonicalBlockAdded(block, _) => block, + }; + let receipts = &block.execution_outcome().receipts; + + // Assert that block only contains our subblock transactions and system transactions + assert_eq!( + block.sealed_block().body().transactions.len(), + SYSTEM_TX_COUNT + expected_transactions.len() + ); + + // Assert that all expected transactions are included in the block. + for tx in expected_transactions.drain(..) { + if !block + .sealed_block() + .body() + .transactions + .iter() + .any(|t| t.tx_hash() == *tx) + { + panic!("transaction {tx} was not included"); + } + } + + let fee_recipients = Vec::::decode( + &mut block + .sealed_block() + .body() + .transactions + .last() + .unwrap() + .input() + .as_ref(), + ) + .unwrap() + .into_iter() + .map(|metadata| { + ( + PartialValidatorKey::from_slice(&metadata.validator[..15]), + metadata.fee_recipient, + ) + }) + .collect::>(); + + let mut expected_fees = HashMap::new(); + let mut cumulative_gas_used = 0; + + for (receipt, tx) in receipts + .iter() + .zip(block.recovered_block().transactions_recovered()) + { + if !expected_transactions.contains(tx.tx_hash()) { + continue; + } + + let fee_recipient = fee_recipients + .get(&tx.subblock_proposer().unwrap()) + .unwrap(); + *expected_fees.entry(fee_recipient).or_insert(U256::ZERO) += + calc_gas_balance_spending( + receipt.cumulative_gas_used - cumulative_gas_used, + TEMPO_T1_BASE_FEE as u128, + ); + cumulative_gas_used = receipt.cumulative_gas_used; + + if !failing_transactions.contains(tx.tx_hash()) { + assert!(receipt.status()); + assert!(receipt.cumulative_gas_used > 0); + continue; + } + + let sender = tx.signer(); + let nonce_key = tx.as_aa().unwrap().tx().nonce_key; + let nonce_slot = NonceManager::new().nonces[sender][nonce_key].slot(); + + let slot = block + .execution_outcome() + .state + .state + .get(&NONCE_PRECOMPILE_ADDRESS) + .unwrap() + .storage + .get(&nonce_slot) + .unwrap(); + + // Assert that all failing transactions have bumped the nonce and resulted in a failing receipt + assert!(slot.present_value == slot.original_value() + U256::ONE); + assert!(!receipt.status()); + assert!(receipt.logs().is_empty()); + assert_eq!(receipt.cumulative_gas_used, 0); + } + + for (fee_recipient, expected_fee) in expected_fees { + let fee_token_storage = &block + .execution_outcome() + .state + .state + .get(&DEFAULT_FEE_TOKEN) + .unwrap() + .storage; + + // Assert that all validators were paid for their subblock transactions + let balance_slot = TIP20Token::from_address(DEFAULT_FEE_TOKEN) + .unwrap() + .balances[*fee_recipient] + .slot(); + let slot = fee_token_storage.get(&balance_slot).unwrap(); + + assert_eq!(slot.present_value, slot.original_value() + expected_fee); + } + + // Exit once we reach height 20. + if block.block_number() == 20 { + break; + } + + // Send subblock transactions to all nodes. + // TIP-1000 charges 250k gas for new account creation, so txs from random signers + // need ~300k intrinsic gas. With 600k per-validator budget (5 validators), we fit 2 txs. + for node in nodes.iter() { + for _ in 0..5 { + // Randomly submit some of the transactions from a new signer that doesn't have any funds + if rand_08::random::() { + let tx = + submit_subblock_tx_from(node, &PrivateKeySigner::random(), 1_000_000) + .await; + failing_transactions.push(tx); + expected_transactions.push(tx); + tx + } else { + let tx = submit_subblock_tx(node).await; + expected_transactions.push(tx); + tx + }; + } + } + } + }); +} + +#[test_traced] +fn oversized_subblock_txs_are_removed() { + let _ = tempo_eyre::install(); + + Runner::from(Config::default().with_seed(42)).start(|mut context| async move { + let how_many_signers = 4; + + let setup = Setup::new() + .how_many_signers(how_many_signers) + .epoch_length(10); + + let (mut nodes, _execution_runtime) = setup_validators(&mut context, setup.clone()).await; + + for node in &mut nodes { + node.consensus_config_mut().new_payload_wait_time = Duration::from_millis(500); + } + + join_all(nodes.iter_mut().map(|node| node.start(&context))).await; + + let mut stream = nodes[0] + .execution() + .add_ons_handle + .engine_events + .new_listener(); + + let (mut oversized_tx_hash, mut submitted) = (None, false); + + while let Some(update) = stream.next().await { + let block = match update { + ConsensusEngineEvent::CanonicalBlockAdded(block, _) => block, + _ => continue, + }; + + // After first block, submit an oversized transaction + if !submitted && block.block_number() >= 1 { + let block_gas_limit = block.sealed_block().header().inner.gas_limit; + let gas_budget = + block_gas_limit / TEMPO_SHARED_GAS_DIVISOR / how_many_signers as u64; + + oversized_tx_hash = Some( + submit_subblock_tx_from(&nodes[0], &PrivateKeySigner::random(), gas_budget + 1) + .await, + ); + + submitted = true; + } + + // Check results after submission - verify oversized tx is never included + if submitted && block.block_number() >= 3 { + let txs = &block.sealed_block().body().transactions; + + // Oversized tx should NOT be included in any block + if let Some(hash) = oversized_tx_hash { + assert!( + !txs.iter().any(|t| t.tx_hash() == *hash), + "oversized transaction should not be included in block" + ); + } + } + + if block.block_number() >= 10 { + break; + } + } + }); +} + +async fn submit_subblock_tx( + node: &TestingNode, +) -> TxHash { + // First signer of the test mnemonic + let wallet = PrivateKeySigner::from_bytes(&b256!( + "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" + )) + .unwrap(); + + submit_subblock_tx_from(node, &wallet, 300_000).await +} + +async fn submit_subblock_tx_from( + node: &TestingNode, + wallet: &PrivateKeySigner, + gas_limit: u64, +) -> TxHash { + let mut nonce_bytes = rand_08::random::<[u8; 32]>(); + nonce_bytes[0] = TEMPO_SUBBLOCK_NONCE_KEY_PREFIX; + nonce_bytes[1..16].copy_from_slice(&node.public_key().as_ref()[..15]); + + let provider = node.execution_provider(); + + let gas_price = TEMPO_T1_BASE_FEE as u128; + + let mut tx = TempoTransaction { + chain_id: provider.chain_spec().chain_id(), + calls: vec![Call { + to: Address::ZERO.into(), + input: Default::default(), + value: Default::default(), + }], + gas_limit, + nonce_key: U256::from_be_bytes(nonce_bytes), + max_fee_per_gas: gas_price, + max_priority_fee_per_gas: gas_price, + ..Default::default() + }; + assert!(tx.subblock_proposer().unwrap().matches(node.public_key())); + let signature = wallet.sign_transaction_sync(&mut tx).unwrap(); + + let tx = TempoTxEnvelope::AA(tx.into_signed(signature.into())); + let tx_hash = *tx.tx_hash(); + node.execution() + .eth_api() + .send_raw_transaction(tx.encoded_2718().into()) + .await + .unwrap(); + + tx_hash +} diff --git a/crates/precompiles/src/validator_config_v2/mod.rs b/crates/precompiles/src/validator_config_v2/mod.rs index fcfd66c656..3e65fa2061 100644 --- a/crates/precompiles/src/validator_config_v2/mod.rs +++ b/crates/precompiles/src/validator_config_v2/mod.rs @@ -601,7 +601,6 @@ impl ValidatorConfigV2 { if deactivated_at_height == 0 { self.active_ingress_ips[ingress_hash].write(true)?; } - Ok(()) }