From 5f4c137608e773150ac67f83ae6784019bbf8924 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Mon, 16 Mar 2026 01:19:59 +0100 Subject: [PATCH 01/21] feat(core): add SailfishPlusPlus protocol variant and update README Add the SailfishPlusPlus consensus protocol variant based on the SFSailfish paper (signature-free optimistic RBC, CCS'25). This commit wires the new variant through all exhaustive match arms across the codebase and updates the README with a protocol comparison table. Type/enum foundations: - CertEcho, CertVote, CertReady signature-free RBC message types - Optimistic RBC thresholds on Committee (fast, vote, ready) - SailfishPlusPlus variant in ConsensusProtocol enum - CertEcho/CertVote/CertReady NetworkMessage variants - Wave length 2, pipeline enabled in UniversalCommitterBuilder SailfishPlusPlus follows the Mysticeti model: full blocks (no erasure coding), pull-based dissemination, no acknowledgment references, no BLS. --- README.md | 20 +++++++++-- crates/starfish-core/src/broadcaster.rs | 12 ++++--- crates/starfish-core/src/committee.rs | 33 +++++++++++++++++ .../starfish-core/src/consensus/linearizer.rs | 4 ++- .../src/consensus/universal_committer.rs | 3 +- crates/starfish-core/src/core.rs | 4 ++- crates/starfish-core/src/dag_state.rs | 17 +++++++-- crates/starfish-core/src/net_sync.rs | 9 ++++- crates/starfish-core/src/network.rs | 12 ++++++- crates/starfish-core/src/types.rs | 35 +++++++++++++++++-- 10 files changed, 131 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index bec5a4be..cf7c6d74 100644 --- a/README.md +++ b/README.md @@ -19,15 +19,18 @@ Three versions of Starfish are available in this repository: - **`starfish-speed`**: Strong-vote optimistic variant - Uses strong votes for optimistic transaction sequencing - - Lower latency when validators have same acknowledgments as a leader. Matches + - Lower latency when validators have same acknowledgments as a leader - **`starfish-bls`**: BLS-optimized variant - Uses BLS aggregate signatures to reduce communication complexity for header metadata - Embeds compact aggregate certificates (round, leader, data availability) in block headers - Async BLS verification service offloads signature processing from the critical path -The repository also supports other partially synchronous uncertified DAG-based consensus protocols: +The repository also supports other partially synchronous DAG-based consensus protocols: +- **`sailfish++`**: Implementation based on [SFSailfish](https://eprint.iacr.org/2025/535) ("Optimistic, Signature-Free Reliable Broadcast and Its Applications", CCS'25). +A certified DAG protocol using signature-free optimistic reliable broadcast (RBC) for vertex certification. +Achieves 2-round optimistic commit latency with authentication derived from TCP channels rather than cryptographic signatures. - **`mysticeti`**: Implementation of [Mysticeti](https://www.cs.cornell.edu/~babel/papers/mysticeti.pdf). Validators use a bandwidth efficient pull-based block dissemination strategy: they push their own blocks and request the peers about missing ancestors only. @@ -37,6 +40,17 @@ pushing all unknown history of blocks to their peers. Due to the push strategy, Cordial Miners can tolerate Byzantine attacks, but it is overall a less scalable solution. +### Protocol Comparison + +| Feature | Starfish | Starfish-Speed | Starfish-BLS | Sailfish++ | Mysticeti | Cordial Miners | +|---|---|---|---|---|---|---| +| DAG type | Uncertified | Uncertified | Uncertified | Certified (RBC) | Uncertified | Uncertified | +| Commit latency (rounds) | 3 | 3 (opt: 2) | 3 | 2 | 3 | 3 | +| Transaction encoding | Reed-Solomon | Reed-Solomon | Reed-Solomon | Full blocks | Full blocks | Full blocks | +| Dissemination default | Push-causal | Push-causal | Push-causal | Pull | Pull | Push-causal | +| Certification mechanism | None | None | BLS aggregate sigs | Signature-free RBC | None | None | +| Acknowledgment references | Yes | Yes | Yes (DAC) | No | No | No | + ## Key Features of Starfish - Starfish is a Byzantine Fault Tolerant protocol capable of tolerating up to 1/3 of Byzantine nodes in a partially synchronous network. @@ -182,7 +196,7 @@ NUM_VALIDATORS=10 DESIRED_TPS=1000 CONSENSUS=starfish-speed \ |---|---|---| | `NUM_VALIDATORS` | 10 | Number of validators (recommend < physical cores, max 128) | | `DESIRED_TPS` | 1000 | Target transactions per second | -| `CONSENSUS` | starfish-speed | Protocol: `starfish`, `starfish-speed`, `starfish-bls`, `cordial-miners`, `mysticeti` | +| `CONSENSUS` | starfish-speed | Protocol: `starfish`, `starfish-speed`, `starfish-bls`, `sailfish-pp`, `cordial-miners`, `mysticeti` | | `NUM_BYZANTINE_NODES` | 0 | Must be < `NUM_VALIDATORS / 3` | | `BYZANTINE_STRATEGY` | random-drop | See [Byzantine strategies](#byzantine-strategies) | | `TEST_TIME` | 3000 | Duration in seconds | diff --git a/crates/starfish-core/src/broadcaster.rs b/crates/starfish-core/src/broadcaster.rs index 4222a0b6..ac43cbef 100644 --- a/crates/starfish-core/src/broadcaster.rs +++ b/crates/starfish-core/src/broadcaster.rs @@ -71,7 +71,7 @@ impl BroadcasterParameters { causal_push_shard_round_lag: RoundNumber, ) -> Self { match consensus_protocol { - ConsensusProtocol::Mysticeti => Self { + ConsensusProtocol::Mysticeti | ConsensusProtocol::SailfishPlusPlus => Self { batch_own_block_size: committee_size, batch_other_block_size: 3 * committee_size, batch_shard_size: 3 * committee_size, @@ -412,7 +412,9 @@ where .await .ok()?; } - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { let all_blocks = self.inner.dag_state.get_storage_blocks(&block_references); let mut blocks = Vec::new(); @@ -814,9 +816,9 @@ fn push_transport_format(consensus_protocol: ConsensusProtocol) -> PushOtherBloc ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed | ConsensusProtocol::StarfishBls => PushOtherBlocksFormat::HeadersAndShards, - ConsensusProtocol::CordialMiners | ConsensusProtocol::Mysticeti => { - PushOtherBlocksFormat::FullBlocks - } + ConsensusProtocol::CordialMiners + | ConsensusProtocol::Mysticeti + | ConsensusProtocol::SailfishPlusPlus => PushOtherBlocksFormat::FullBlocks, } } diff --git a/crates/starfish-core/src/committee.rs b/crates/starfish-core/src/committee.rs index bd44d1c7..43e6f30a 100644 --- a/crates/starfish-core/src/committee.rs +++ b/crates/starfish-core/src/committee.rs @@ -21,6 +21,11 @@ pub struct Committee { validity_threshold: Stake, // The minimum stake required for validity quorum_threshold: Stake, // The minimum stake required for quorum info_length: usize, // info length used for encoding + // Optimistic RBC thresholds (SailfishPlusPlus). + // Precomputed from total_stake and validity_threshold. + optimistic_fast_threshold: Stake, + optimistic_vote_threshold: Stake, + optimistic_ready_threshold: Stake, } impl Committee { @@ -62,11 +67,24 @@ impl Committee { _ => f + 2, }; + // Optimistic RBC thresholds (N = total_stake, F = validity_threshold). + // fast: ceil((N + 2F - 2) / 2) ≈ 5N/6 + // vote: ceil(N / 2) + // ready: ceil((N + F - 1) / 2) + let n = total_stake; + let f_stake = validity_threshold; + let optimistic_fast_threshold = (n + 2 * f_stake - 2 + 1) / 2; + let optimistic_vote_threshold = (n + 1) / 2; + let optimistic_ready_threshold = (n + f_stake - 1 + 1) / 2; + Arc::new(Committee { authorities, validity_threshold, quorum_threshold, info_length, + optimistic_fast_threshold, + optimistic_vote_threshold, + optimistic_ready_threshold, }) } @@ -84,6 +102,21 @@ impl Committee { self.quorum_threshold + 1 } + /// Optimistic fast delivery threshold: ceil((N + 2F - 2) / 2). + pub fn optimistic_fast_threshold(&self) -> Stake { + self.optimistic_fast_threshold + } + + /// Optimistic vote threshold: ceil(N / 2). + pub fn optimistic_vote_threshold(&self) -> Stake { + self.optimistic_vote_threshold + } + + /// Optimistic ready threshold: ceil((N + F - 1) / 2). + pub fn optimistic_ready_threshold(&self) -> Stake { + self.optimistic_ready_threshold + } + pub fn get_public_key(&self, authority: AuthorityIndex) -> Option<&PublicKey> { self.authorities .get(authority as usize) diff --git a/crates/starfish-core/src/consensus/linearizer.rs b/crates/starfish-core/src/consensus/linearizer.rs index 3cd875a1..c297704f 100644 --- a/crates/starfish-core/src/consensus/linearizer.rs +++ b/crates/starfish-core/src/consensus/linearizer.rs @@ -257,7 +257,9 @@ impl Linearizer { ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed => { self.collect_subdag_starfish(dag_state, leader_block, false) } - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { self.collect_subdag_mysticeti(dag_state, leader_block) } }; diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index bc38e397..2d43739d 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -236,7 +236,8 @@ impl UniversalCommitterBuilder { ConsensusProtocol::Mysticeti | ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed - | ConsensusProtocol::StarfishBls => Self { + | ConsensusProtocol::StarfishBls + | ConsensusProtocol::SailfishPlusPlus => Self { committee, dag_state, metrics, diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 5916b4dc..2749ba88 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -663,7 +663,9 @@ impl Core { info_length, parity_length, )), - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => None, + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => None, } } diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index ca304f9c..8766c103 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -99,6 +99,7 @@ pub enum ConsensusProtocol { Starfish, StarfishSpeed, StarfishBls, + SailfishPlusPlus, } impl ConsensusProtocol { @@ -109,6 +110,7 @@ impl ConsensusProtocol { "starfish" => ConsensusProtocol::Starfish, "starfish-bls" | "starfish-l" => ConsensusProtocol::StarfishBls, "starfish-speed" | "starfish-s" => ConsensusProtocol::StarfishSpeed, + "sailfish++" | "sailfish-pp" => ConsensusProtocol::SailfishPlusPlus, _ => ConsensusProtocol::Starfish, } } @@ -122,9 +124,15 @@ impl ConsensusProtocol { ) } + pub fn is_sailfish_pp(self) -> bool { + matches!(self, ConsensusProtocol::SailfishPlusPlus) + } + pub fn default_dissemination_mode(self) -> DisseminationMode { match self { - ConsensusProtocol::Mysticeti => DisseminationMode::Pull, + ConsensusProtocol::Mysticeti | ConsensusProtocol::SailfishPlusPlus => { + DisseminationMode::Pull + } ConsensusProtocol::CordialMiners => DisseminationMode::PushCausal, ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed @@ -525,6 +533,9 @@ impl DagState { ConsensusProtocol::StarfishBls => tracing::info!("Starting Starfish-BLS protocol"), ConsensusProtocol::StarfishSpeed => tracing::info!("Starting Starfish-Speed protocol"), ConsensusProtocol::CordialMiners => tracing::info!("Starting Cordial Miners protocol"), + ConsensusProtocol::SailfishPlusPlus => { + tracing::info!("Starting Sailfish++ protocol") + } } let dag_state = Self { store: store.clone(), @@ -2770,7 +2781,9 @@ mod tests { }; let empty_transactions = Vec::new(); let merkle_root = match consensus_protocol { - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { TransactionsCommitment::new_from_transactions(&empty_transactions) } ConsensusProtocol::Starfish diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index 9eefe8fa..23a3dae0 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -372,6 +372,10 @@ impl ConnectionHandler {} } true } @@ -461,7 +465,9 @@ impl ConnectionHandler { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { blocks_with_transactions.push(block); } ConsensusProtocol::Starfish @@ -865,6 +871,7 @@ impl ConnectionHandler "missing_parents", Self::MissingTxDataRequest(_) => "missing_tx_data", Self::PartialSig(..) => "partial_sig", + Self::CertEcho(_) => "cert_echo", + Self::CertVote(_) => "cert_vote", + Self::CertReady(_) => "cert_ready", } } } diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index b6877a09..818eeae6 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -171,6 +171,29 @@ pub struct PartialSig { pub signature: BlsSignatureBytes, } +// --------------------------------------------------------------------------- +// Signature-free RBC messages (SailfishPlusPlus). +// Authentication relies on the underlying authenticated TCP channels. +// --------------------------------------------------------------------------- + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct CertEcho { + pub block_ref: BlockReference, + pub sender: AuthorityIndex, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct CertVote { + pub block_ref: BlockReference, + pub sender: AuthorityIndex, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct CertReady { + pub block_ref: BlockReference, + pub sender: AuthorityIndex, +} + // --------------------------------------------------------------------------- // BlockHeader — signed, content-addressed block identity. // Contains exactly the fields that feed into BlockDigest::new() and @@ -717,7 +740,9 @@ impl VerifiedBlock { TransactionsCommitment::default() } } - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { TransactionsCommitment::new_from_transactions(&transactions) } }; @@ -993,7 +1018,9 @@ impl VerifiedBlock { // Header-only blocks: shard sidecars are verified and carried // externally via `process_standalone_shards`. } - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { let empty_transactions = Vec::new(); let empty_transactions_commitment = TransactionsCommitment::new_from_transactions(&empty_transactions); @@ -1179,7 +1206,9 @@ impl VerifiedBlock { "Only StarfishBls blocks may carry BLS fields" ); } - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { ensure!( acknowledgments.is_empty(), "{consensus_protocol:?} blocks must not carry acknowledgments" From dd46278ca56d56cac0ea5af4d8ea1b5949654aec Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Mon, 16 Mar 2026 14:22:50 +0100 Subject: [PATCH 02/21] Implement Sailfish live certification pipeline --- crates/starfish-core/src/cert_aggregator.rs | 335 ++++++++++++++++++ crates/starfish-core/src/committee.rs | 6 +- .../src/consensus/base_committer.rs | 8 + .../src/consensus/universal_committer.rs | 136 ++++++- crates/starfish-core/src/core.rs | 35 +- .../starfish-core/src/core_thread/spawned.rs | 21 ++ crates/starfish-core/src/dag_state.rs | 135 ++++++- crates/starfish-core/src/lib.rs | 2 + crates/starfish-core/src/net_sync.rs | 119 ++++++- crates/starfish-core/src/network.rs | 18 +- crates/starfish-core/src/sailfish_service.rs | 325 +++++++++++++++++ crates/starfish-core/src/syncer.rs | 36 ++ crates/starfish-core/src/types.rs | 18 +- crates/starfish-core/src/validator.rs | 4 + 14 files changed, 1163 insertions(+), 35 deletions(-) create mode 100644 crates/starfish-core/src/cert_aggregator.rs create mode 100644 crates/starfish-core/src/sailfish_service.rs diff --git a/crates/starfish-core/src/cert_aggregator.rs b/crates/starfish-core/src/cert_aggregator.rs new file mode 100644 index 00000000..4746a5ff --- /dev/null +++ b/crates/starfish-core/src/cert_aggregator.rs @@ -0,0 +1,335 @@ +// Copyright (c) 2025 IOTA Stiftung +// SPDX-License-Identifier: Apache-2.0 + +//! Signature-free RBC aggregation for SailfishPlusPlus. +//! +//! Tracks Echo, Vote, and Ready messages per block and emits certification +//! events when the optimistic thresholds are reached. + +use std::{collections::BTreeMap, sync::Arc}; + +use ahash::AHashMap; + +use crate::{ + committee::Committee, + types::{BlockReference, CertMessage, CertMessageKind, RoundNumber, Stake}, +}; + +/// Events emitted by the aggregator when thresholds are crossed. +#[derive(Debug, Clone)] +pub enum CertEvent { + /// Block certified via fast path (enough echoes). + FastDelivery(BlockReference), + /// Enough echoes to trigger a Vote broadcast. + SendVote(BlockReference), + /// Enough echoes/votes/readys to trigger a Ready broadcast. + SendReady(BlockReference), + /// Block certified via slow path (enough readys). + SlowDelivery(BlockReference), +} + +/// Per-block RBC aggregation state. +struct BlockCertState { + echo_stake: Stake, + echo_seen: u128, + vote_sent: bool, + fast_delivered: bool, + vote_stake: Stake, + vote_seen: u128, + ready_stake: Stake, + ready_seen: u128, + ready_sent: bool, + certified: bool, +} + +impl BlockCertState { + fn new() -> Self { + Self { + echo_stake: 0, + echo_seen: 0, + vote_sent: false, + fast_delivered: false, + vote_stake: 0, + vote_seen: 0, + ready_stake: 0, + ready_seen: 0, + ready_sent: false, + certified: false, + } + } +} + +pub struct CertificationAggregator { + committee: Arc, + rounds: BTreeMap>, +} + +impl CertificationAggregator { + pub fn new(committee: Arc) -> Self { + Self { + committee, + rounds: BTreeMap::new(), + } + } + + pub fn add_message(&mut self, message: &CertMessage) -> Vec { + match message.kind { + CertMessageKind::Echo => self.add_echo(message), + CertMessageKind::Vote => self.add_vote(message), + CertMessageKind::Ready => self.add_ready(message), + } + } + + fn add_echo(&mut self, message: &CertMessage) -> Vec { + let state = self + .rounds + .entry(message.block_ref.round) + .or_default() + .entry(message.block_ref) + .or_insert_with(BlockCertState::new); + let sender = message.sender; + let mask = 1u128 << sender; + if state.echo_seen & mask != 0 { + return Vec::new(); + } + state.echo_seen |= mask; + let stake = self.committee.get_stake(sender).unwrap_or(0); + state.echo_stake += stake; + + let mut events = Vec::new(); + + // Fast delivery: ceil((N + 2F - 2) / 2) echoes + if !state.fast_delivered && state.echo_stake >= self.committee.optimistic_fast_threshold() { + state.fast_delivered = true; + state.certified = true; + events.push(CertEvent::FastDelivery(message.block_ref)); + } + + // Vote trigger: ceil(N / 2) echoes + if !state.vote_sent && state.echo_stake >= self.committee.optimistic_vote_threshold() { + state.vote_sent = true; + events.push(CertEvent::SendVote(message.block_ref)); + } + + events + } + + fn add_vote(&mut self, message: &CertMessage) -> Vec { + let state = self + .rounds + .entry(message.block_ref.round) + .or_default() + .entry(message.block_ref) + .or_insert_with(BlockCertState::new); + let sender = message.sender; + let mask = 1u128 << sender; + if state.vote_seen & mask != 0 { + return Vec::new(); + } + state.vote_seen |= mask; + let stake = self.committee.get_stake(sender).unwrap_or(0); + state.vote_stake += stake; + + let mut events = Vec::new(); + + // Ready trigger from votes: ceil((N + F - 1) / 2) votes + if !state.ready_sent + && state.vote_stake >= self.committee.optimistic_ready_threshold() + { + state.ready_sent = true; + events.push(CertEvent::SendReady(message.block_ref)); + } + + events + } + + fn add_ready(&mut self, message: &CertMessage) -> Vec { + let state = self + .rounds + .entry(message.block_ref.round) + .or_default() + .entry(message.block_ref) + .or_insert_with(BlockCertState::new); + let sender = message.sender; + let mask = 1u128 << sender; + if state.ready_seen & mask != 0 { + return Vec::new(); + } + state.ready_seen |= mask; + let stake = self.committee.get_stake(sender).unwrap_or(0); + state.ready_stake += stake; + + let mut events = Vec::new(); + + // Ready amplification: F+1 readys trigger a Ready broadcast + if !state.ready_sent && state.ready_stake >= self.committee.validity_threshold() { + state.ready_sent = true; + events.push(CertEvent::SendReady(message.block_ref)); + } + + // Slow delivery: 2F+1 readys + if !state.certified && state.ready_stake >= self.committee.quorum_threshold() { + state.certified = true; + events.push(CertEvent::SlowDelivery(message.block_ref)); + } + + events + } + + #[allow(dead_code)] + pub fn is_certified(&self, block_ref: &BlockReference) -> bool { + self.rounds + .get(&block_ref.round) + .and_then(|m| m.get(block_ref)) + .is_some_and(|s| s.certified) + } + + pub fn cleanup_below_round(&mut self, round: RoundNumber) { + self.rounds = self.rounds.split_off(&round); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::AuthorityIndex; + + fn make_committee(n: usize) -> Arc { + Committee::new_test(vec![1; n]) + } + + fn message( + block_ref: BlockReference, + sender: AuthorityIndex, + kind: CertMessageKind, + ) -> CertMessage { + CertMessage { + block_ref, + sender, + kind, + } + } + + fn echo(block_ref: BlockReference, sender: AuthorityIndex) -> CertMessage { + message(block_ref, sender, CertMessageKind::Echo) + } + + fn vote(block_ref: BlockReference, sender: AuthorityIndex) -> CertMessage { + message(block_ref, sender, CertMessageKind::Vote) + } + + fn ready(block_ref: BlockReference, sender: AuthorityIndex) -> CertMessage { + message(block_ref, sender, CertMessageKind::Ready) + } + + #[test] + fn fast_delivery_with_enough_echoes() { + // N=4, F=1. fast_threshold = ceil((4 + 2 - 2) / 2) = 2 + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + let events = agg.add_message(&echo(block, 0)); + assert!(!agg.is_certified(&block)); + // With N=4, F=1: fast = ceil((4+2-2)/2) = 2, vote = ceil(4/2) = 2 + // So 1 echo is not enough + assert!( + events + .iter() + .all(|e| !matches!(e, CertEvent::FastDelivery(_))) + ); + + let events = agg.add_message(&echo(block, 1)); + assert!( + events + .iter() + .any(|e| matches!(e, CertEvent::FastDelivery(_))) + ); + assert!(agg.is_certified(&block)); + } + + #[test] + fn vote_trigger() { + // N=4, F=1. vote_threshold = ceil(4/2) = 2 + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&echo(block, 0)); + let events = agg.add_message(&echo(block, 1)); + assert!(events.iter().any(|e| matches!(e, CertEvent::SendVote(_)))); + } + + #[test] + fn ready_trigger_from_votes() { + // N=4, F=1. ready_threshold = ceil((4+1-1)/2) = 2 + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&vote(block, 0)); + let events = agg.add_message(&vote(block, 1)); + assert!(events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + } + + #[test] + fn echoes_do_not_trigger_ready_directly() { + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + let events = agg.add_message(&echo(block, 0)); + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + + let events = agg.add_message(&echo(block, 1)); + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + } + + #[test] + fn slow_delivery_from_readys() { + // N=4, F=1. validity = 2, quorum = 3 + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&ready(block, 0)); + // F+1 = 2 readys triggers Ready amplification + let events = agg.add_message(&ready(block, 1)); + assert!(events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + + // 2F+1 = 3 readys triggers slow delivery + let events = agg.add_message(&ready(block, 2)); + assert!( + events + .iter() + .any(|e| matches!(e, CertEvent::SlowDelivery(_))) + ); + assert!(agg.is_certified(&block)); + } + + #[test] + fn duplicate_messages_ignored() { + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&echo(block, 0)); + let events = agg.add_message(&echo(block, 0)); + assert!(events.is_empty()); + } + + #[test] + fn cleanup_via_split_off() { + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block_r1 = BlockReference::new_test(0, 1); + let block_r5 = BlockReference::new_test(0, 5); + + agg.add_message(&echo(block_r1, 0)); + agg.add_message(&echo(block_r5, 0)); + + agg.cleanup_below_round(3); + assert!(!agg.rounds.contains_key(&1)); + assert!(agg.rounds.contains_key(&5)); + } +} diff --git a/crates/starfish-core/src/committee.rs b/crates/starfish-core/src/committee.rs index 43e6f30a..7f4e3718 100644 --- a/crates/starfish-core/src/committee.rs +++ b/crates/starfish-core/src/committee.rs @@ -73,9 +73,9 @@ impl Committee { // ready: ceil((N + F - 1) / 2) let n = total_stake; let f_stake = validity_threshold; - let optimistic_fast_threshold = (n + 2 * f_stake - 2 + 1) / 2; - let optimistic_vote_threshold = (n + 1) / 2; - let optimistic_ready_threshold = (n + f_stake - 1 + 1) / 2; + let optimistic_fast_threshold = (n + 2 * f_stake - 2).div_ceil(2); + let optimistic_vote_threshold = n.div_ceil(2); + let optimistic_ready_threshold = (n + f_stake - 1).div_ceil(2); Arc::new(Committee { authorities, diff --git a/crates/starfish-core/src/consensus/base_committer.rs b/crates/starfish-core/src/consensus/base_committer.rs index 4b73801a..0d3eba3e 100644 --- a/crates/starfish-core/src/consensus/base_committer.rs +++ b/crates/starfish-core/src/consensus/base_committer.rs @@ -166,6 +166,11 @@ impl BaseCommitter { }) .collect(); + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + certified_leader_blocks + .retain(|l| self.dag_state.has_vertex_certificate(l.reference())); + } + // There can be at most one certified leader, otherwise // it means the BFT assumption is broken. if certified_leader_blocks.len() > 1 { @@ -424,6 +429,9 @@ impl BaseCommitter { if self.dag_state.consensus_protocol == ConsensusProtocol::StarfishBls { // StarfishBls: require BLS leader certificate instead of DAG-edge quorum. self.dag_state.has_leader_certificate(l.reference()) + } else if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + self.dag_state.has_vertex_certificate(l.reference()) + && self.enough_leader_support(certifying_round, l, voter_info) } else { self.enough_leader_support(certifying_round, l, voter_info) } diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index 2d43739d..211f7968 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -8,11 +8,11 @@ use ahash::{AHashMap, AHashSet}; use super::{CommitMetastate, LeaderStatus, VoterInfo, WAVE_LENGTH, base_committer::BaseCommitter}; use crate::{ - committee::Committee, + committee::{Committee, QuorumThreshold, StakeAggregator}, consensus::base_committer::BaseCommitterOptions, dag_state::{ConsensusProtocol, DagState}, metrics::{Metrics, UtilizationTimerVecExt}, - types::{AuthorityIndex, BlockReference, RoundNumber, format_authority_round}, + types::{AuthorityIndex, BlockReference, RoundNumber, Stake, format_authority_round}, }; /// A universal committer uses a collection of committers to commit a sequence @@ -40,6 +40,10 @@ impl UniversalCommitter { /// list of ordered decided leaders. #[tracing::instrument(skip_all, fields(last_decided = %last_decided))] pub fn try_commit(&mut self, last_decided: BlockReference) -> Vec { + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + return self.try_commit_sailfish(last_decided); + } + let highest_known_round = self.dag_state.highest_round(); let last_decided_round = last_decided.round(); let last_decided_round_authority = (last_decided.round(), last_decided.authority); @@ -176,6 +180,124 @@ impl UniversalCommitter { .collect() } + fn try_commit_sailfish(&mut self, last_decided: BlockReference) -> Vec { + let highest_known_round = self.dag_state.highest_round(); + let last_decided_round = last_decided.round(); + let highest_possible_leader_to_decide_round = highest_known_round.saturating_sub(1); + let mut committed = Vec::new(); + let mut newly_committed = AHashSet::new(); + + for round in last_decided_round + 1..=highest_possible_leader_to_decide_round { + let leader = self.committee.elect_leader(round); + let Some(anchor) = self.try_direct_commit_block_sailfish(leader, round) else { + continue; + }; + + let mut chain = vec![anchor.clone()]; + let mut current = anchor; + for prev_round in (last_decided_round + 1..current.round()).rev() { + let prev_leader = self.committee.elect_leader(prev_round); + let mut linked_leaders: Vec<_> = self + .dag_state + .get_blocks_at_authority_round(prev_leader, prev_round) + .into_iter() + .filter(|block| self.dag_state.has_vertex_certificate(block.reference())) + .filter(|block| self.dag_state.linked(¤t, block)) + .collect(); + + if linked_leaders.len() > 1 { + panic!( + "[Sailfish] More than one linked leader for {}", + format_authority_round(prev_leader, prev_round) + ); + } + + if let Some(prev) = linked_leaders.pop() { + current = prev.clone(); + chain.push(prev); + } + } + + chain.reverse(); + for leader_block in chain { + let key = (leader_block.authority(), leader_block.round()); + if !newly_committed.insert(key) { + continue; + } + let direct_decide = leader_block.round() == round; + let status = LeaderStatus::Commit(leader_block, None); + self.decided.insert(key, status.clone()); + if self.metrics_emitted.insert(key) { + tracing::debug!("Decided {status}"); + self.update_metrics(&status, direct_decide); + } + committed.push(status); + } + } + + committed.sort(); + committed + } + + fn try_direct_commit_block_sailfish( + &self, + leader: AuthorityIndex, + leader_round: RoundNumber, + ) -> Option> { + let support_round = leader_round + 1; + let leader_blocks = self + .dag_state + .get_blocks_at_authority_round(leader, leader_round); + + let mut committed_leaders: Vec<_> = leader_blocks + .into_iter() + .filter(|leader_block| { + let leader_certified = self.dag_state.has_vertex_certificate(leader_block.reference()); + let support_stake = + self.supporting_stake_for_sailfish(leader_block.reference(), support_round); + if std::env::var_os("SAILFISH_DEBUG_COMMIT").is_some() { + eprintln!( + "sailfish round={} leader={} certified={} support_stake={} quorum={}", + leader_round, + leader_block.reference(), + leader_certified, + support_stake, + self.committee.quorum_threshold() + ); + } + leader_certified + }) + .filter(|leader_block| { + self.supporting_stake_for_sailfish(leader_block.reference(), support_round) + >= self.committee.quorum_threshold() + }) + .collect(); + + if committed_leaders.len() > 1 { + panic!( + "[Sailfish] More than one certified block for {}", + format_authority_round(leader, leader_round) + ) + } + + committed_leaders.pop() + } + + fn supporting_stake_for_sailfish( + &self, + leader_ref: &BlockReference, + support_round: RoundNumber, + ) -> Stake { + let supporting_blocks = self.dag_state.get_blocks_by_round_cached(support_round); + let mut aggregator = StakeAggregator::::new(); + for block in supporting_blocks.iter() { + if block.block_references().iter().any(|reference| reference == leader_ref) { + aggregator.add(block.authority(), &self.committee); + } + } + aggregator.get_stake() + } + /// Return list of leaders for the round. Syncer may give those leaders some /// extra time. To preserve (theoretical) liveness, we should wait /// `Delta` time for at least the first leader. @@ -236,14 +358,20 @@ impl UniversalCommitterBuilder { ConsensusProtocol::Mysticeti | ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed - | ConsensusProtocol::StarfishBls - | ConsensusProtocol::SailfishPlusPlus => Self { + | ConsensusProtocol::StarfishBls => Self { committee, dag_state, metrics, wave_length: WAVE_LENGTH, pipeline: true, }, + ConsensusProtocol::SailfishPlusPlus => Self { + committee, + dag_state, + metrics, + wave_length: WAVE_LENGTH, + pipeline: false, + }, ConsensusProtocol::CordialMiners => Self { committee, dag_state, diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 2749ba88..2d7ba9a1 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -479,6 +479,21 @@ impl Core { return None; } + // SailfishPlusPlus: require certified parent quorum before creating a block. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && clock_round > 1 + && !self.dag_state.certified_parent_quorum(clock_round - 1) + { + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!( + "sailfish blocked new block round={} missing certified quorum at prev_round={}", + clock_round, + clock_round - 1 + ); + } + return None; + } + let voted_leader_ref = if self.dag_state.consensus_protocol == ConsensusProtocol::StarfishBls { self.select_starfish_bls_voted_leader(clock_round) @@ -591,6 +606,15 @@ impl Core { ) ); tracing::debug!("Created block {:?}", block_data); + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() + { + eprintln!( + "sailfish created block round={} refs={:?}", + block_data.round(), + block_data.block_references() + ); + } if first_block.is_none() { first_block = Some(block_data.clone()); } @@ -641,7 +665,16 @@ impl Core { MetaTransaction::Include(include) => pending_refs.push(include), } } - let block_references = self.compress_pending_block_references(&pending_refs, block_round); + let mut block_references = + self.compress_pending_block_references(&pending_refs, block_round); + + // SailfishPlusPlus: filter parents to only include certified blocks. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + block_references.retain(|r| { + r.round == 0 || self.dag_state.has_vertex_certificate(r) + }); + } + (transactions, block_references) } diff --git a/crates/starfish-core/src/core_thread/spawned.rs b/crates/starfish-core/src/core_thread/spawned.rs index 138195e9..a474ae7c 100644 --- a/crates/starfish-core/src/core_thread/spawned.rs +++ b/crates/starfish-core/src/core_thread/spawned.rs @@ -65,6 +65,8 @@ enum CoreThreadCommand { PeerSubscribed(AuthorityIndex, oneshot::Sender<()>), /// Apply BLS certificate events from the BLS verification service. ApplyCertificateEvents(Vec, oneshot::Sender<()>), + /// Apply Sailfish RBC-certified vertices on the core thread. + ApplySailfishCertificates(Vec, oneshot::Sender<()>), } impl @@ -167,6 +169,17 @@ impl) { + let (sender, receiver) = oneshot::channel(); + self.send(CoreThreadCommand::ApplySailfishCertificates( + certified_refs, + sender, + )) + .await; + receiver.await.expect("core thread is not expected to stop"); + } + /// Apply BLS certificate events from the BLS verification service. pub async fn apply_certificate_events(&self, events: Vec) { let (sender, receiver) = oneshot::channel(); @@ -318,6 +331,14 @@ impl CoreThread { self.syncer.apply_certificate_events(events); sender.send(()).ok(); } + CoreThreadCommand::ApplySailfishCertificates(certified_refs, sender) => { + metrics + .core_thread_tasks_total + .with_label_values(&["apply_sailfish_certificates"]) + .inc(); + self.syncer.apply_sailfish_certificates(certified_refs); + sender.send(()).ok(); + } } } self.syncer diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 8766c103..d19e194c 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -32,7 +32,7 @@ use crate::{ threshold_clock::ThresholdClockAggregator, types::{ AuthorityIndex, BlockDigest, BlockReference, BlsAggregateCertificate, ProvableShard, - RoundNumber, TransactionData, VerifiedBlock, + RoundNumber, Stake, TransactionData, VerifiedBlock, }, }; @@ -307,6 +307,9 @@ struct DagStateInner { consensus_protocol: ConsensusProtocol, precomputed_round_sigs: BTreeMap, precomputed_leader_sigs: BTreeMap, + /// Per-authority RBC vertex certificates (SailfishPlusPlus). + /// Stored as BTreeSet per authority for split_off cleanup. + vertex_certificates: Vec>, } impl DagState { @@ -373,6 +376,7 @@ impl DagState { consensus_protocol, precomputed_round_sigs: BTreeMap::new(), precomputed_leader_sigs: BTreeMap::new(), + vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), }; let mut builder = RecoveredStateBuilder::new(); let replay_started = Instant::now(); @@ -939,6 +943,52 @@ impl DagState { .contains(block_ref) } + /// Mark a batch of vertices as RBC-certified (SailfishPlusPlus). + /// Only the explicitly provided references are certified; parent + /// certificates must come from their own RBC evidence. + pub fn mark_vertices_certified(&self, block_refs: &[BlockReference]) -> bool { + if block_refs.is_empty() { + return false; + } + let mut inner = self.dag_state_inner.write(); + let mut changed = false; + for &block_ref in block_refs { + changed |= inner.vertex_certificates[block_ref.authority as usize].insert(block_ref); + } + changed + } + + /// Mark a vertex as RBC-certified (SailfishPlusPlus). + pub fn mark_vertex_certified(&self, block_ref: BlockReference) -> bool { + self.mark_vertices_certified(&[block_ref]) + } + + /// Check whether a vertex has been RBC-certified (SailfishPlusPlus). + pub fn has_vertex_certificate(&self, block_ref: &BlockReference) -> bool { + self.dag_state_inner.read().vertex_certificates[block_ref.authority as usize] + .contains(block_ref) + } + + /// Check whether 2f+1 stake is certified at the given round + /// (SailfishPlusPlus certified parent quorum gate). + pub fn certified_parent_quorum(&self, round: RoundNumber) -> bool { + let inner = self.dag_state_inner.read(); + let mut stake: Stake = 0; + for auth in 0..inner.committee_size { + // Check if this authority has any certified block at the round. + if inner.vertex_certificates[auth] + .iter() + .any(|r| r.round == round) + { + stake += self + .committee + .get_stake(auth as AuthorityIndex) + .unwrap_or(0); + } + } + self.committee.is_quorum(stake) + } + pub fn dac_certificate_state( &self, block_ref: &BlockReference, @@ -1230,6 +1280,13 @@ impl DagState { } } + if self.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && quorum_round > 1 + && !self.certified_parent_quorum(quorum_round - 1) + { + return false; + } + true } @@ -2089,6 +2146,15 @@ impl DagStateInner { digest: BlockDigest::default(), }; self.precomputed_leader_sigs = self.precomputed_leader_sigs.split_off(&leader_split_ref); + // Prune SailfishPlusPlus vertex certificates. + for auth in 0..self.committee_size { + let split_ref = BlockReference { + authority: auth as AuthorityIndex, + round: self.evicted_rounds[auth], + digest: BlockDigest::default(), + }; + self.vertex_certificates[auth] = self.vertex_certificates[auth].split_off(&split_ref); + } } pub fn add_block( @@ -2806,6 +2872,39 @@ mod tests { Data::new(block) } + fn make_full_block( + authority: AuthorityIndex, + round: RoundNumber, + parents: Vec, + consensus_protocol: ConsensusProtocol, + ) -> Data { + let empty_transactions = Vec::new(); + let merkle_root = match consensus_protocol { + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus => { + TransactionsCommitment::new_from_transactions(&empty_transactions) + } + ConsensusProtocol::Starfish + | ConsensusProtocol::StarfishSpeed + | ConsensusProtocol::StarfishBls => TransactionsCommitment::default(), + }; + let mut block = VerifiedBlock::new( + authority, + round, + parents, + Vec::new(), + 0, + SignatureBytes::default(), + empty_transactions, + merkle_root, + None, + None, + ); + block.preserialize(); + Data::new(block) + } + #[test] fn acknowledgments_are_only_enabled_for_starfish_variants() { assert!(!ConsensusProtocol::Mysticeti.supports_acknowledgments()); @@ -2837,6 +2936,40 @@ mod tests { assert!(dag_state.is_data_available(&reference)); } + #[test] + fn batch_vertex_certification_marks_only_explicit_vertices() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let parent_ref = BlockReference::new_test(1, 1); + let child = make_full_block( + 2, + 2, + vec![parent_ref], + ConsensusProtocol::SailfishPlusPlus, + ); + let child_ref = *child.reference(); + + dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); + dag_state.mark_vertices_certified(&[child_ref]); + + assert!(dag_state.has_vertex_certificate(&child_ref)); + assert!(!dag_state.has_vertex_certificate(&parent_ref)); + } + + #[test] + fn batch_vertex_certification_updates_quorum_view() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let certified = vec![ + BlockReference::new_test(0, 4), + BlockReference::new_test(1, 4), + BlockReference::new_test(2, 4), + ]; + + dag_state.mark_vertices_certified(&certified); + + assert!(dag_state.certified_parent_quorum(4)); + assert!(!dag_state.certified_parent_quorum(5)); + } + #[test] fn starfish_speed_adaptive_acknowledgments_only_uses_local_leader_history() { let dag_state = open_test_dag_state_for_with_feature("starfish-speed", 0, true); diff --git a/crates/starfish-core/src/lib.rs b/crates/starfish-core/src/lib.rs index a4e0eac0..ea6e6142 100644 --- a/crates/starfish-core/src/lib.rs +++ b/crates/starfish-core/src/lib.rs @@ -7,6 +7,7 @@ mod block_manager; pub mod bls_batch_verifier; mod bls_certificate_aggregator; mod bls_service; +mod cert_aggregator; pub mod committee; pub mod config; pub mod consensus; @@ -15,6 +16,7 @@ pub mod core; mod core_thread; mod crypto; mod dag_state; +mod sailfish_service; pub use dag_state::ByzantineStrategy; mod broadcaster; mod data; diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index 23a3dae0..cff1641f 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -36,6 +36,9 @@ use crate::{ metrics::{Metrics, UtilizationTimerVecExt}, network::{BlockBatch, Connection, Network, NetworkMessage, ShardPayload}, runtime::{Handle, JoinError, JoinHandle, sleep}, + sailfish_service::{ + SailfishCertEvent, SailfishServiceHandle, SailfishServiceMessage, start_sailfish_service, + }, shard_reconstructor::{DecodedBlocks, ShardMessage, start_shard_reconstructor}, syncer::{CommitObserver, Syncer, SyncerSignals}, types::{ @@ -260,6 +263,7 @@ struct ConnectionHandler own_id: AuthorityIndex, sender: mpsc::Sender, bls_service: Option, + sailfish_service: Option, } impl ConnectionHandler { @@ -271,6 +275,7 @@ impl ConnectionHandler, filter_for_shards: Arc, bls_service: Option, + sailfish_service: Option, ) -> Self { let consensus_protocol = inner.dag_state.consensus_protocol; let committee_size = inner.dag_state.committee_size; @@ -316,6 +321,7 @@ impl ConnectionHandler ConnectionHandler {} + NetworkMessage::CertMessage(message) => { + if message.sender != self.peer_id { + return true; // reject: sender must match peer + } + if let Some(ref sf) = self.sailfish_service { + sf.send(SailfishServiceMessage::CertMessage(message)); + } + } } true } @@ -662,6 +672,11 @@ impl ConnectionHandler = new_data_blocks.iter().map(|b| *b.reference()).collect(); + sf.send(SailfishServiceMessage::ProcessBlocks(block_refs)); + } // Notify CordialKnowledge about all new headers in one batch. let header_refs = new_data_blocks .iter() @@ -812,6 +827,14 @@ impl ConnectionHandler = verified_data_blocks + .iter() + .map(|b| *b.reference()) + .collect(); + sf.send(SailfishServiceMessage::ProcessBlocks(block_refs)); + } // Notify CordialKnowledge about all new headers and shards in one batch. let header_refs: Vec<_> = verified_data_blocks .iter() @@ -971,6 +994,7 @@ pub struct NetworkSyncer { partial_sig_routing_task: Option>, bls_event_task: Option>, bls_broadcast_task: Option>, + sf_event_task: Option>, cordial_knowledge_task: JoinHandle<()>, } @@ -1029,6 +1053,14 @@ impl NetworkSyncer } else { (None, None) }; + // Create Sailfish service channel for SailfishPlusPlus protocol. + let is_sailfish_pp = dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus; + let (sf_msg_tx, sf_msg_rx) = if is_sailfish_pp { + let (tx, rx) = mpsc::unbounded_channel::(); + (Some(tx), Some(rx)) + } else { + (None, None) + }; let mut syncer = Syncer::new( core, NetworkSyncSignals { @@ -1038,6 +1070,7 @@ impl NetworkSyncer commit_observer, metrics.clone(), bls_msg_tx.clone(), + sf_msg_tx.clone(), ); syncer.force_new_block(0); let syncer = CoreThreadDispatcher::start(syncer); @@ -1210,6 +1243,61 @@ impl NetworkSyncer (None, None) }; + // Start Sailfish++ RBC certification service. + let (sf_service, sf_event_task) = + if let (Some(sf_tx), Some(sf_rx)) = (sf_msg_tx, sf_msg_rx) { + let (event_tx, mut event_rx) = mpsc::unbounded_channel::>(); + start_sailfish_service( + inner.committee.clone(), + own_authority, + sf_rx, + event_tx, + metrics.clone(), + ); + let sf_handle = SailfishServiceHandle::new(sf_tx); + // Event bridge: certification events -> core thread + network broadcast + let event_inner = inner.clone(); + let event_task = handle.spawn(async move { + while let Some(events) = event_rx.recv().await { + let certified_refs: Vec<_> = events + .iter() + .filter_map(|event| match event { + SailfishCertEvent::Certified(block_ref) => Some(*block_ref), + SailfishCertEvent::Broadcast(_) => None, + }) + .collect(); + if !certified_refs.is_empty() { + event_inner + .syncer + .apply_sailfish_certificates(certified_refs) + .await; + } + // Broadcast Vote/Ready messages + { + let senders: Vec<_> = + event_inner.peer_senders.read().values().cloned().collect(); + for event in &events { + match event { + SailfishCertEvent::Broadcast(message) => { + for sender in &senders { + send_network_message_reliably( + sender, + NetworkMessage::CertMessage(message.clone()), + ) + .await; + } + } + SailfishCertEvent::Certified(_) => {} + } + } + } + } + }); + (Some(sf_handle), Some(event_task)) + } else { + (None, None) + }; + let block_fetcher = Arc::new(BlockFetcher::start()); let main_task = handle.spawn(Self::run( network, @@ -1218,6 +1306,7 @@ impl NetworkSyncer block_fetcher, metrics.clone(), bls_service.clone(), + sf_service.clone(), )); Self { inner, @@ -1227,6 +1316,7 @@ impl NetworkSyncer partial_sig_routing_task, bls_event_task, bls_broadcast_task, + sf_event_task, cordial_knowledge_task, } } @@ -1259,6 +1349,11 @@ impl NetworkSyncer bls_broadcast.abort(); bls_broadcast.await.ok(); } + // Abort Sailfish event bridge task. + if let Some(sf_task) = self.sf_event_task { + sf_task.abort(); + sf_task.await.ok(); + } // Stop the cordial knowledge actor. self.cordial_knowledge_task.abort(); self.cordial_knowledge_task.await.ok(); @@ -1275,6 +1370,7 @@ impl NetworkSyncer block_fetcher: Arc, metrics: Arc, bls_service: Option, + sf_service: Option, ) { let mut connections: HashMap>> = HashMap::new(); let handle = Handle::current(); @@ -1287,7 +1383,11 @@ impl NetworkSyncer }; let commit_timeout_task = handle.spawn(Self::commit_timeout_task(inner.clone())); - let cleanup_task = handle.spawn(Self::cleanup_task(inner.clone(), bls_service.clone())); + let cleanup_task = handle.spawn(Self::cleanup_task( + inner.clone(), + bls_service.clone(), + sf_service.clone(), + )); let filter_for_blocks = Arc::new(FilterForBlocks::new()); let filter_for_shards = Arc::new(FilterForShards::new(inner.committee.info_length())); while let Some(connection) = inner.recv_or_stopped(network.connection_receiver()).await { @@ -1310,6 +1410,7 @@ impl NetworkSyncer filter_for_blocks.clone(), filter_for_shards.clone(), bls_service.clone(), + sf_service.clone(), )); connections.insert(peer_id, task); } @@ -1337,6 +1438,7 @@ impl NetworkSyncer filter_for_blocks: Arc, filter_for_shards: Arc, bls_service: Option, + sf_service: Option, ) -> Option<()> { let gc_round = inner.dag_state.gc_round(); connection @@ -1353,6 +1455,7 @@ impl NetworkSyncer filter_for_blocks, filter_for_shards, bls_service, + sf_service, ); handler.start().await; @@ -1498,6 +1601,7 @@ impl NetworkSyncer async fn cleanup_task( inner: Arc>, bls_service: Option, + sf_service: Option, ) -> Option<()> { let cleanup_interval = Duration::from_secs(10); loop { @@ -1512,6 +1616,11 @@ impl NetworkSyncer bls.send(BlsServiceMessage::Cleanup(gc_round)); } + // Notify Sailfish service to clean up old aggregator state. + if let Some(ref sf) = sf_service { + sf.send(SailfishServiceMessage::Cleanup(gc_round)); + } + // Evict stale entries from CordialKnowledge // using per-authority eviction rounds. let eviction_rounds = inner.dag_state.evicted_rounds(); diff --git a/crates/starfish-core/src/network.rs b/crates/starfish-core/src/network.rs index f89acf11..22204d2b 100644 --- a/crates/starfish-core/src/network.rs +++ b/crates/starfish-core/src/network.rs @@ -30,7 +30,7 @@ use crate::{ runtime::JoinHandle, stat::HistogramSender, types::{ - AuthorityIndex, BlockReference, CertEcho, CertReady, CertVote, PartialSig, ProvableShard, + AuthorityIndex, BlockReference, CertMessage, CertMessageKind, PartialSig, ProvableShard, RoundNumber, VerifiedBlock, }, }; @@ -156,12 +156,8 @@ pub enum NetworkMessage { /// Standalone partial BLS signature (DAC, round pre-sign, or leader /// pre-sign). PartialSig(PartialSig), - /// SailfishPlusPlus: Optimistic RBC Echo message - CertEcho(CertEcho), - /// SailfishPlusPlus: Optimistic RBC Vote message - CertVote(CertVote), - /// SailfishPlusPlus: Optimistic RBC Ready message - CertReady(CertReady), + /// SailfishPlusPlus: Optimistic RBC message with phase metadata. + CertMessage(CertMessage), } impl NetworkMessage { @@ -172,9 +168,11 @@ impl NetworkMessage { Self::MissingParentsRequest(_) => "missing_parents", Self::MissingTxDataRequest(_) => "missing_tx_data", Self::PartialSig(..) => "partial_sig", - Self::CertEcho(_) => "cert_echo", - Self::CertVote(_) => "cert_vote", - Self::CertReady(_) => "cert_ready", + Self::CertMessage(message) => match message.kind { + CertMessageKind::Echo => "cert_echo", + CertMessageKind::Vote => "cert_vote", + CertMessageKind::Ready => "cert_ready", + }, } } } diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs new file mode 100644 index 00000000..be8967db --- /dev/null +++ b/crates/starfish-core/src/sailfish_service.rs @@ -0,0 +1,325 @@ +// Copyright (c) 2025 IOTA Stiftung +// SPDX-License-Identifier: Apache-2.0 + +//! Async Sailfish++ RBC certification service. +//! +//! Receives blocks and signature-free RBC phase messages, runs the +//! [`CertificationAggregator`], and emits certification events back to the +//! syncer thread. + +use std::{collections::VecDeque, sync::Arc}; + +use prometheus::Registry; +use tokio::sync::mpsc; + +use crate::{ + cert_aggregator::{CertEvent, CertificationAggregator}, + committee::Committee, + metrics::Metrics, + types::{AuthorityIndex, BlockReference, CertMessage, CertMessageKind, RoundNumber}, +}; + +/// Messages sent to the Sailfish service. +pub enum SailfishServiceMessage { + /// New blocks arrived — generate self-echoes. + ProcessBlocks(Vec), + /// Incoming RBC message from a peer. + CertMessage(CertMessage), + /// Cleanup aggregator state below round. + Cleanup(RoundNumber), +} + +/// Events sent back to the syncer/core thread. +#[derive(Debug, Clone)] +pub enum SailfishCertEvent { + /// Block certified (fast or slow path). + Certified(BlockReference), + /// Broadcast an RBC phase message to all peers. + Broadcast(CertMessage), +} + +/// Handle for sending messages to the Sailfish service. +#[derive(Clone)] +pub struct SailfishServiceHandle { + sender: mpsc::UnboundedSender, +} + +impl SailfishServiceHandle { + pub fn new(sender: mpsc::UnboundedSender) -> Self { + Self { sender } + } + + pub fn send(&self, msg: SailfishServiceMessage) { + let _ = self.sender.send(msg); + } +} + +/// Start the Sailfish RBC certification service as a tokio task. +pub fn start_sailfish_service( + committee: Arc, + own_authority: AuthorityIndex, + receiver: mpsc::UnboundedReceiver, + event_tx: mpsc::UnboundedSender>, + _metrics: Arc, +) { + tokio::spawn(run_sailfish_service( + committee, + own_authority, + receiver, + event_tx, + )); +} + +async fn run_sailfish_service( + committee: Arc, + own_authority: AuthorityIndex, + mut receiver: mpsc::UnboundedReceiver, + event_tx: mpsc::UnboundedSender>, +) { + let mut aggregator = CertificationAggregator::new(committee); + + while let Some(msg) = receiver.recv().await { + let mut all_events = Vec::new(); + + process_message(msg, &mut aggregator, own_authority, &mut all_events); + + while let Ok(msg) = receiver.try_recv() { + process_message(msg, &mut aggregator, own_authority, &mut all_events); + } + + if !all_events.is_empty() { + let _ = event_tx.send(all_events); + } + } +} + +fn process_message( + msg: SailfishServiceMessage, + aggregator: &mut CertificationAggregator, + own_authority: AuthorityIndex, + events: &mut Vec, +) { + match msg { + SailfishServiceMessage::ProcessBlocks(block_refs) => { + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!("sailfish service process blocks {:?}", block_refs); + } + for block_ref in block_refs { + let echo = CertMessage { + block_ref, + sender: own_authority, + kind: CertMessageKind::Echo, + }; + let cert_events = aggregator.add_message(&echo); + dispatch_cert_events(aggregator, cert_events, own_authority, events); + events.push(SailfishCertEvent::Broadcast(echo)); + } + } + SailfishServiceMessage::CertMessage(message) => { + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!( + "sailfish service inbound kind={:?} block={} sender={}", + message.kind, message.block_ref, message.sender + ); + } + let cert_events = aggregator.add_message(&message); + dispatch_cert_events(aggregator, cert_events, own_authority, events); + } + SailfishServiceMessage::Cleanup(round) => { + aggregator.cleanup_below_round(round); + } + } +} + +fn dispatch_cert_events( + aggregator: &mut CertificationAggregator, + cert_events: Vec, + own_authority: AuthorityIndex, + out: &mut Vec, +) { + let mut pending = VecDeque::from(cert_events); + while let Some(event) = pending.pop_front() { + match event { + CertEvent::FastDelivery(block_ref) | CertEvent::SlowDelivery(block_ref) => { + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!("sailfish service certified {}", block_ref); + } + out.push(SailfishCertEvent::Certified(block_ref)); + } + CertEvent::SendVote(block_ref) => { + let vote = CertMessage { + block_ref, + sender: own_authority, + kind: CertMessageKind::Vote, + }; + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!("sailfish service local vote {}", block_ref); + } + pending.extend(aggregator.add_message(&vote)); + out.push(SailfishCertEvent::Broadcast(vote)); + } + CertEvent::SendReady(block_ref) => { + let ready = CertMessage { + block_ref, + sender: own_authority, + kind: CertMessageKind::Ready, + }; + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!("sailfish service local ready {}", block_ref); + } + pending.extend(aggregator.add_message(&ready)); + out.push(SailfishCertEvent::Broadcast(ready)); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_committee(n: usize) -> Arc { + Committee::new_test(vec![1; n]) + } + + fn test_metrics() -> Arc { + Metrics::new(&Registry::new(), None, None, None).0 + } + + #[tokio::test] + async fn local_vote_is_counted_before_broadcast() { + let committee = make_committee(4); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 7); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx.recv().await.expect("expected local echo broadcast"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) + if *received == block_ref + && *sender == own_authority + && *kind == CertMessageKind::Echo + ) + })); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + let events = event_rx.recv().await.expect("expected certification events"); + assert!(events.iter().any(|event| { + matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) + })); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) + if *received == block_ref + && *sender == own_authority + && *kind == CertMessageKind::Vote + ) + })); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 2, + kind: CertMessageKind::Vote, + })) + .unwrap(); + let events = event_rx.recv().await.expect("expected ready broadcast"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) + if *received == block_ref + && *sender == own_authority + && *kind == CertMessageKind::Ready + ) + })); + } + + #[tokio::test] + async fn local_ready_is_counted_before_broadcast() { + let committee = make_committee(7); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 9); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let _ = event_rx.recv().await.expect("expected local echo broadcast"); + + for sender in [2, 3, 4] { + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender, + kind: CertMessageKind::Echo, + })) + .unwrap(); + } + let events = event_rx.recv().await.expect("expected vote event"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) + if *received == block_ref + && *sender == own_authority + && *kind == CertMessageKind::Vote + ) + })); + + for sender in [2, 3, 4] { + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender, + kind: CertMessageKind::Vote, + })) + .unwrap(); + } + let events = event_rx.recv().await.expect("expected ready event"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) + if *received == block_ref + && *sender == own_authority + && *kind == CertMessageKind::Ready + ) + })); + + for sender in [2, 3, 4, 5] { + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender, + kind: CertMessageKind::Ready, + })) + .unwrap(); + } + let events = event_rx + .recv() + .await + .expect("expected slow-path certification event"); + assert!(events.iter().any(|event| { + matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) + })); + } +} diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index 04957c4c..e5716d4e 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -18,6 +18,7 @@ use crate::{ data::Data, metrics::{Metrics, UtilizationTimerVecExt}, runtime::timestamp_utc, + sailfish_service::SailfishServiceMessage, types::{ AuthorityIndex, BlockReference, PartialSig, PartialSigKind, ProvableShard, ReconstructedTransactionData, RoundNumber, Stake, VerifiedBlock, @@ -61,6 +62,7 @@ pub struct Syncer { subscriber_stake: Stake, pub(crate) metrics: Arc, bls_tx: Option>, + sailfish_tx: Option>, } pub trait SyncerSignals: Send + Sync { @@ -91,6 +93,7 @@ impl Syncer { commit_observer: C, metrics: Arc, bls_tx: Option>, + sailfish_tx: Option>, ) -> Self { let committee_size = core.committee().len(); let own_stake = core @@ -109,6 +112,7 @@ impl Syncer { subscriber_stake: own_stake, metrics, bls_tx, + sailfish_tx, } } @@ -174,6 +178,28 @@ impl Syncer { self.try_new_block(BlockCreationReason::TransactionData); } + /// Called after Sailfish RBC certification events have been applied to + /// DagState on the core thread. Retries block creation and sequencing + /// when any certificate is new. + pub fn apply_sailfish_certificates(&mut self, certified_refs: Vec) { + if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { + eprintln!( + "sailfish cert apply count={} refs={:?}", + certified_refs.len(), + certified_refs + ); + } + if self + .core + .dag_state() + .mark_vertices_certified(&certified_refs) + { + self.maybe_update_proposal_wait(); + self.try_new_block(BlockCreationReason::CertificateEvent); + self.try_new_commit(); + } + } + /// Apply BLS certificate events from the BLS verification service. /// Fresh certificates can unblock both block production and sequencing, so /// retry both paths immediately when DAG state changed. @@ -260,6 +286,10 @@ impl Syncer { self.proposal_wait_round = None; // Send own block and DAC partial sig to BLS service. self.send_bls_message(BlsServiceMessage::ProcessBlocks(vec![block.clone()])); + // Send own block reference to Sailfish certification service. + self.send_sailfish_message(SailfishServiceMessage::ProcessBlocks(vec![ + *block.reference(), + ])); if let Some((block_ref, auth, sig)) = self.core.generate_own_dac_partial_sig(block) { self.send_bls_message(BlsServiceMessage::PartialSig(PartialSig { kind: PartialSigKind::Dac(block_ref), @@ -280,6 +310,12 @@ impl Syncer { } } + fn send_sailfish_message(&self, message: SailfishServiceMessage) { + if let Some(ref sender) = self.sailfish_tx { + let _ = sender.send(message); + } + } + fn maybe_update_proposal_wait(&mut self) { let threshold_round = self.core.dag_state().threshold_clock_round(); if threshold_round <= self.core.last_proposed() { diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index 818eeae6..1a667c2f 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -176,22 +176,18 @@ pub struct PartialSig { // Authentication relies on the underlying authenticated TCP channels. // --------------------------------------------------------------------------- -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct CertEcho { - pub block_ref: BlockReference, - pub sender: AuthorityIndex, -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct CertVote { - pub block_ref: BlockReference, - pub sender: AuthorityIndex, +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum CertMessageKind { + Echo, + Vote, + Ready, } #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct CertReady { +pub struct CertMessage { pub block_ref: BlockReference, pub sender: AuthorityIndex, + pub kind: CertMessageKind, } // --------------------------------------------------------------------------- diff --git a/crates/starfish-core/src/validator.rs b/crates/starfish-core/src/validator.rs index 160d18f7..9e2d76dc 100644 --- a/crates/starfish-core/src/validator.rs +++ b/crates/starfish-core/src/validator.rs @@ -309,6 +309,7 @@ mod smoke_tests { #[test_case("starfish", 60)] #[test_case("starfish-speed", 80)] #[test_case("starfish-bls", 100)] + #[test_case("sailfish++", 120)] #[tokio::test] async fn validator_commit(consensus: &str, port_offset: u16) { run_commit_test(consensus, port_offset).await; @@ -400,6 +401,7 @@ mod smoke_tests { #[test_case("starfish", 160)] #[test_case("starfish-speed", 180)] #[test_case("starfish-bls", 200)] + #[test_case("sailfish++", 220)] #[tokio::test] async fn validator_sync(consensus: &str, port_offset: u16) { run_sync_test(consensus, port_offset).await; @@ -462,6 +464,7 @@ mod smoke_tests { #[test_case("starfish", 260)] #[test_case("starfish-speed", 280)] #[test_case("starfish-bls", 300)] + #[test_case("sailfish++", 320)] #[tokio::test] async fn validator_crash_faults(consensus: &str, port_offset: u16) { run_crash_faults_test(consensus, port_offset).await; @@ -662,6 +665,7 @@ mod smoke_tests { #[test_case("starfish-speed", 580)] #[test_case("starfish-bls", 600)] #[tokio::test(flavor = "multi_thread")] + #[test_case("sailfish++", 620)] async fn validator_lifecycle_and_recovery(consensus: &str, port_offset: u16) { run_lifecycle_test(consensus, port_offset).await; } From 822eb12f768f077f7eccd10d240c9df0158b3d61 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Mon, 16 Mar 2026 16:42:10 +0100 Subject: [PATCH 03/21] refactor(core): clean up SailfishPlusPlus implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add echo→ready threshold trigger in cert_aggregator (per SFSailfish paper: Ready from ceil((N+F-1)/2) echoes, votes, or F+1 readys) - Fix UniversalCommitterBuilder: wave_length=2, pipeline=true for SailfishPlusPlus (was wave_length=3, pipeline=false) - Remove all debug eprintln blocks guarded by SAILFISH_DEBUG_FLOW and SAILFISH_DEBUG_COMMIT environment variables - Move prometheus::Registry import to #[cfg(test)] in sailfish_service - Rename collect_subdag_mysticeti → collect_subdag_ancestors and collect_subdag_starfish → collect_subdag_acknowledgments - Update sailfish_service and cert_aggregator tests to match new echo→ready event flow --- crates/starfish-core/src/cert_aggregator.rs | 15 ++- .../starfish-core/src/consensus/linearizer.rs | 10 +- .../src/consensus/universal_committer.rs | 22 ++-- crates/starfish-core/src/core.rs | 20 +--- .../starfish-core/src/core_thread/spawned.rs | 2 +- crates/starfish-core/src/dag_state.rs | 7 +- crates/starfish-core/src/net_sync.rs | 96 ++++++++-------- crates/starfish-core/src/sailfish_service.rs | 103 +++++++----------- crates/starfish-core/src/syncer.rs | 7 -- 9 files changed, 111 insertions(+), 171 deletions(-) diff --git a/crates/starfish-core/src/cert_aggregator.rs b/crates/starfish-core/src/cert_aggregator.rs index 4746a5ff..a7d2071b 100644 --- a/crates/starfish-core/src/cert_aggregator.rs +++ b/crates/starfish-core/src/cert_aggregator.rs @@ -111,6 +111,12 @@ impl CertificationAggregator { events.push(CertEvent::SendVote(message.block_ref)); } + // Ready trigger from echoes: ceil((N + F - 1) / 2) echoes + if !state.ready_sent && state.echo_stake >= self.committee.optimistic_ready_threshold() { + state.ready_sent = true; + events.push(CertEvent::SendReady(message.block_ref)); + } + events } @@ -133,9 +139,7 @@ impl CertificationAggregator { let mut events = Vec::new(); // Ready trigger from votes: ceil((N + F - 1) / 2) votes - if !state.ready_sent - && state.vote_stake >= self.committee.optimistic_ready_threshold() - { + if !state.ready_sent && state.vote_stake >= self.committee.optimistic_ready_threshold() { state.ready_sent = true; events.push(CertEvent::SendReady(message.block_ref)); } @@ -273,7 +277,8 @@ mod tests { } #[test] - fn echoes_do_not_trigger_ready_directly() { + fn ready_trigger_from_echoes() { + // N=4, F=1. ready_threshold = ceil((4+1-1)/2) = 2 let committee = make_committee(4); let mut agg = CertificationAggregator::new(committee); let block = BlockReference::new_test(0, 1); @@ -282,7 +287,7 @@ mod tests { assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); let events = agg.add_message(&echo(block, 1)); - assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + assert!(events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); } #[test] diff --git a/crates/starfish-core/src/consensus/linearizer.rs b/crates/starfish-core/src/consensus/linearizer.rs index c297704f..1c7c40f6 100644 --- a/crates/starfish-core/src/consensus/linearizer.rs +++ b/crates/starfish-core/src/consensus/linearizer.rs @@ -92,7 +92,7 @@ impl Linearizer { /// Collect the sub-dag from a specific anchor excluding any duplicates or /// blocks that have already been committed (within previous sub-dags). /// Uses BFS with per-level batch fetching to minimize lock acquisitions. - fn collect_subdag_mysticeti( + fn collect_subdag_ancestors( &mut self, dag_state: &DagState, leader_block: Data, @@ -136,7 +136,7 @@ impl Linearizer { /// votes per ack_ref, commit when quorum is reached. /// When `direct_ack` is true (StarfishBls): only self-acks count — the DAC /// certificate provides the availability guarantee directly. - fn collect_subdag_starfish( + fn collect_subdag_acknowledgments( &mut self, dag_state: &DagState, leader_block: Data, @@ -252,15 +252,15 @@ impl Linearizer { .then(|| self.collect_strong_vote_holders(dag_state, leader_block.round() + 1)); let mut sub_dag = match consensus_protocol { ConsensusProtocol::StarfishBls => { - self.collect_subdag_starfish(dag_state, leader_block, true) + self.collect_subdag_acknowledgments(dag_state, leader_block, true) } ConsensusProtocol::Starfish | ConsensusProtocol::StarfishSpeed => { - self.collect_subdag_starfish(dag_state, leader_block, false) + self.collect_subdag_acknowledgments(dag_state, leader_block, false) } ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners | ConsensusProtocol::SailfishPlusPlus => { - self.collect_subdag_mysticeti(dag_state, leader_block) + self.collect_subdag_ancestors(dag_state, leader_block) } }; diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index 211f7968..5fd2c5f0 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -252,20 +252,8 @@ impl UniversalCommitter { let mut committed_leaders: Vec<_> = leader_blocks .into_iter() .filter(|leader_block| { - let leader_certified = self.dag_state.has_vertex_certificate(leader_block.reference()); - let support_stake = - self.supporting_stake_for_sailfish(leader_block.reference(), support_round); - if std::env::var_os("SAILFISH_DEBUG_COMMIT").is_some() { - eprintln!( - "sailfish round={} leader={} certified={} support_stake={} quorum={}", - leader_round, - leader_block.reference(), - leader_certified, - support_stake, - self.committee.quorum_threshold() - ); - } - leader_certified + self.dag_state + .has_vertex_certificate(leader_block.reference()) }) .filter(|leader_block| { self.supporting_stake_for_sailfish(leader_block.reference(), support_round) @@ -291,7 +279,11 @@ impl UniversalCommitter { let supporting_blocks = self.dag_state.get_blocks_by_round_cached(support_round); let mut aggregator = StakeAggregator::::new(); for block in supporting_blocks.iter() { - if block.block_references().iter().any(|reference| reference == leader_ref) { + if block + .block_references() + .iter() + .any(|reference| reference == leader_ref) + { aggregator.add(block.authority(), &self.committee); } } diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 2d7ba9a1..39719e92 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -484,13 +484,6 @@ impl Core { && clock_round > 1 && !self.dag_state.certified_parent_quorum(clock_round - 1) { - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!( - "sailfish blocked new block round={} missing certified quorum at prev_round={}", - clock_round, - clock_round - 1 - ); - } return None; } @@ -606,15 +599,6 @@ impl Core { ) ); tracing::debug!("Created block {:?}", block_data); - if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus - && std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() - { - eprintln!( - "sailfish created block round={} refs={:?}", - block_data.round(), - block_data.block_references() - ); - } if first_block.is_none() { first_block = Some(block_data.clone()); } @@ -670,9 +654,7 @@ impl Core { // SailfishPlusPlus: filter parents to only include certified blocks. if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { - block_references.retain(|r| { - r.round == 0 || self.dag_state.has_vertex_certificate(r) - }); + block_references.retain(|r| r.round == 0 || self.dag_state.has_vertex_certificate(r)); } (transactions, block_references) diff --git a/crates/starfish-core/src/core_thread/spawned.rs b/crates/starfish-core/src/core_thread/spawned.rs index a474ae7c..c666d750 100644 --- a/crates/starfish-core/src/core_thread/spawned.rs +++ b/crates/starfish-core/src/core_thread/spawned.rs @@ -176,7 +176,7 @@ impl NetworkSyncer }; // Start Sailfish++ RBC certification service. - let (sf_service, sf_event_task) = - if let (Some(sf_tx), Some(sf_rx)) = (sf_msg_tx, sf_msg_rx) { - let (event_tx, mut event_rx) = mpsc::unbounded_channel::>(); - start_sailfish_service( - inner.committee.clone(), - own_authority, - sf_rx, - event_tx, - metrics.clone(), - ); - let sf_handle = SailfishServiceHandle::new(sf_tx); - // Event bridge: certification events -> core thread + network broadcast - let event_inner = inner.clone(); - let event_task = handle.spawn(async move { - while let Some(events) = event_rx.recv().await { - let certified_refs: Vec<_> = events - .iter() - .filter_map(|event| match event { - SailfishCertEvent::Certified(block_ref) => Some(*block_ref), - SailfishCertEvent::Broadcast(_) => None, - }) - .collect(); - if !certified_refs.is_empty() { - event_inner - .syncer - .apply_sailfish_certificates(certified_refs) - .await; - } - // Broadcast Vote/Ready messages - { - let senders: Vec<_> = - event_inner.peer_senders.read().values().cloned().collect(); - for event in &events { - match event { - SailfishCertEvent::Broadcast(message) => { - for sender in &senders { - send_network_message_reliably( - sender, - NetworkMessage::CertMessage(message.clone()), - ) - .await; - } + let (sf_service, sf_event_task) = if let (Some(sf_tx), Some(sf_rx)) = (sf_msg_tx, sf_msg_rx) + { + let (event_tx, mut event_rx) = mpsc::unbounded_channel::>(); + start_sailfish_service( + inner.committee.clone(), + own_authority, + sf_rx, + event_tx, + metrics.clone(), + ); + let sf_handle = SailfishServiceHandle::new(sf_tx); + // Event bridge: certification events -> core thread + network broadcast + let event_inner = inner.clone(); + let event_task = handle.spawn(async move { + while let Some(events) = event_rx.recv().await { + let certified_refs: Vec<_> = events + .iter() + .filter_map(|event| match event { + SailfishCertEvent::Certified(block_ref) => Some(*block_ref), + SailfishCertEvent::Broadcast(_) => None, + }) + .collect(); + if !certified_refs.is_empty() { + event_inner + .syncer + .apply_sailfish_certificates(certified_refs) + .await; + } + // Broadcast Vote/Ready messages + { + let senders: Vec<_> = + event_inner.peer_senders.read().values().cloned().collect(); + for event in &events { + match event { + SailfishCertEvent::Broadcast(message) => { + for sender in &senders { + send_network_message_reliably( + sender, + NetworkMessage::CertMessage(message.clone()), + ) + .await; } - SailfishCertEvent::Certified(_) => {} } + SailfishCertEvent::Certified(_) => {} } } } - }); - (Some(sf_handle), Some(event_task)) - } else { - (None, None) - }; + } + }); + (Some(sf_handle), Some(event_task)) + } else { + (None, None) + }; let block_fetcher = Arc::new(BlockFetcher::start()); let main_task = handle.spawn(Self::run( diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index be8967db..5a710ef4 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -9,7 +9,6 @@ use std::{collections::VecDeque, sync::Arc}; -use prometheus::Registry; use tokio::sync::mpsc; use crate::{ @@ -101,9 +100,6 @@ fn process_message( ) { match msg { SailfishServiceMessage::ProcessBlocks(block_refs) => { - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!("sailfish service process blocks {:?}", block_refs); - } for block_ref in block_refs { let echo = CertMessage { block_ref, @@ -116,12 +112,6 @@ fn process_message( } } SailfishServiceMessage::CertMessage(message) => { - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!( - "sailfish service inbound kind={:?} block={} sender={}", - message.kind, message.block_ref, message.sender - ); - } let cert_events = aggregator.add_message(&message); dispatch_cert_events(aggregator, cert_events, own_authority, events); } @@ -141,9 +131,6 @@ fn dispatch_cert_events( while let Some(event) = pending.pop_front() { match event { CertEvent::FastDelivery(block_ref) | CertEvent::SlowDelivery(block_ref) => { - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!("sailfish service certified {}", block_ref); - } out.push(SailfishCertEvent::Certified(block_ref)); } CertEvent::SendVote(block_ref) => { @@ -152,9 +139,6 @@ fn dispatch_cert_events( sender: own_authority, kind: CertMessageKind::Vote, }; - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!("sailfish service local vote {}", block_ref); - } pending.extend(aggregator.add_message(&vote)); out.push(SailfishCertEvent::Broadcast(vote)); } @@ -164,9 +148,6 @@ fn dispatch_cert_events( sender: own_authority, kind: CertMessageKind::Ready, }; - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!("sailfish service local ready {}", block_ref); - } pending.extend(aggregator.add_message(&ready)); out.push(SailfishCertEvent::Broadcast(ready)); } @@ -176,6 +157,8 @@ fn dispatch_cert_events( #[cfg(test)] mod tests { + use prometheus::Registry; + use super::*; fn make_committee(n: usize) -> Arc { @@ -186,8 +169,11 @@ mod tests { Metrics::new(&Registry::new(), None, None, None).0 } + /// N=4, F=1: 2 echoes triggers FastDelivery + SendVote + SendReady. + /// Verifies that the local vote and ready are counted (via add_message) + /// before being broadcast. #[tokio::test] - async fn local_vote_is_counted_before_broadcast() { + async fn echoes_trigger_fast_delivery_and_vote_and_ready() { let committee = make_committee(4); let own_authority = 1; let block_ref = BlockReference::new_test(0, 7); @@ -196,20 +182,23 @@ mod tests { start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + // Own echo broadcast. msg_tx .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) .unwrap(); - let events = event_rx.recv().await.expect("expected local echo broadcast"); + let events = event_rx + .recv() + .await + .expect("expected local echo broadcast"); assert!(events.iter().any(|event| { matches!( event, - SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) - if *received == block_ref - && *sender == own_authority - && *kind == CertMessageKind::Echo + SailfishCertEvent::Broadcast(CertMessage { kind, .. }) + if *kind == CertMessageKind::Echo ) })); + // Second echo crosses all three thresholds at once. msg_tx .send(SailfishServiceMessage::CertMessage(CertMessage { block_ref, @@ -217,41 +206,34 @@ mod tests { kind: CertMessageKind::Echo, })) .unwrap(); - let events = event_rx.recv().await.expect("expected certification events"); + let events = event_rx + .recv() + .await + .expect("expected certification + vote + ready events"); assert!(events.iter().any(|event| { matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) })); assert!(events.iter().any(|event| { matches!( event, - SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) - if *received == block_ref - && *sender == own_authority - && *kind == CertMessageKind::Vote + SailfishCertEvent::Broadcast(CertMessage { sender, kind, .. }) + if *sender == own_authority && *kind == CertMessageKind::Vote ) })); - - msg_tx - .send(SailfishServiceMessage::CertMessage(CertMessage { - block_ref, - sender: 2, - kind: CertMessageKind::Vote, - })) - .unwrap(); - let events = event_rx.recv().await.expect("expected ready broadcast"); assert!(events.iter().any(|event| { matches!( event, - SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) - if *received == block_ref - && *sender == own_authority - && *kind == CertMessageKind::Ready + SailfishCertEvent::Broadcast(CertMessage { sender, kind, .. }) + if *sender == own_authority && *kind == CertMessageKind::Ready ) })); } + /// N=7, F=2: 4 echoes triggers SendVote + SendReady (both at threshold 4). + /// Then 4 peer readys (+ own ready already counted) reaches quorum for + /// SlowDelivery. #[tokio::test] - async fn local_ready_is_counted_before_broadcast() { + async fn echoes_trigger_vote_and_ready_then_slow_delivery() { let committee = make_committee(7); let own_authority = 1; let block_ref = BlockReference::new_test(0, 9); @@ -260,11 +242,16 @@ mod tests { start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + // Own echo. msg_tx .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) .unwrap(); - let _ = event_rx.recv().await.expect("expected local echo broadcast"); + let _ = event_rx + .recv() + .await + .expect("expected local echo broadcast"); + // 3 more echoes → 4 total → SendVote + SendReady. for sender in [2, 3, 4] { msg_tx .send(SailfishServiceMessage::CertMessage(CertMessage { @@ -274,37 +261,23 @@ mod tests { })) .unwrap(); } - let events = event_rx.recv().await.expect("expected vote event"); + let events = event_rx.recv().await.expect("expected vote + ready events"); assert!(events.iter().any(|event| { matches!( event, - SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) - if *received == block_ref - && *sender == own_authority - && *kind == CertMessageKind::Vote + SailfishCertEvent::Broadcast(CertMessage { sender, kind, .. }) + if *sender == own_authority && *kind == CertMessageKind::Vote ) })); - - for sender in [2, 3, 4] { - msg_tx - .send(SailfishServiceMessage::CertMessage(CertMessage { - block_ref, - sender, - kind: CertMessageKind::Vote, - })) - .unwrap(); - } - let events = event_rx.recv().await.expect("expected ready event"); assert!(events.iter().any(|event| { matches!( event, - SailfishCertEvent::Broadcast(CertMessage { block_ref: received, sender, kind }) - if *received == block_ref - && *sender == own_authority - && *kind == CertMessageKind::Ready + SailfishCertEvent::Broadcast(CertMessage { sender, kind, .. }) + if *sender == own_authority && *kind == CertMessageKind::Ready ) })); + // 4 peer readys → ready_stake = 1 (own) + 4 = 5 ≥ quorum (5) → SlowDelivery. for sender in [2, 3, 4, 5] { msg_tx .send(SailfishServiceMessage::CertMessage(CertMessage { diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index e5716d4e..d275b64c 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -182,13 +182,6 @@ impl Syncer { /// DagState on the core thread. Retries block creation and sequencing /// when any certificate is new. pub fn apply_sailfish_certificates(&mut self, certified_refs: Vec) { - if std::env::var_os("SAILFISH_DEBUG_FLOW").is_some() { - eprintln!( - "sailfish cert apply count={} refs={:?}", - certified_refs.len(), - certified_refs - ); - } if self .core .dag_state() From b0852c717a14cd3fd4c8299efe09d59d8743d8ed Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 01:25:27 +0100 Subject: [PATCH 04/21] feat(core): add Sailfish++ timeout/no-vote control plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the SFSailfish control-plane messages (timeout certificates and no-vote certificates) that enable liveness under faults and direct skip in the commit rule. Types: SailfishTimeoutMsg/Cert, SailfishNoVoteMsg/Cert, SailfishFields embedded in BlockHeader. Ed25519 signed with domain-separated digests. Crypto: Signer::sign_digest, PublicKey::verify_digest_signature, sailfish_timeout_digest, sailfish_novote_digest helpers. Network: SailfishTimeout and SailfishNoVote message variants. Service: SignedQuorumAggregator shared by timeout and no-vote paths. Aggregates signed messages until 2f+1 quorum, emits TimeoutReady and NoVoteReady events. DagState: Stores timeout/no-vote certs with BTreeMap, accessible via add/get/has methods, cleaned up via split_off. Core: Block creation gated by sailfish_control_ready — requires TC when lacking parent to previous leader, NVC additionally for round leader. SailfishFields computed and embedded in block header. Committer: Direct skip from NVC in try_commit_sailfish. Backward walk also uses NVC for skip resolution. Validation: verify_signed_quorum helper checks signer uniqueness, quorum stake, and Ed25519 signatures for both TC and NVC. Fixes incorrect SFSailfish paper link in README (eprint → arxiv). Adds sailfish-pp to dryrun.sh options comment. --- README.md | 2 +- .../src/bls_certificate_aggregator.rs | 1 + .../src/consensus/universal_committer.rs | 40 ++ crates/starfish-core/src/core.rs | 99 +++- .../starfish-core/src/core_thread/spawned.rs | 38 +- crates/starfish-core/src/crypto.rs | 43 ++ crates/starfish-core/src/dag_state.rs | 71 ++- crates/starfish-core/src/decoder.rs | 1 + crates/starfish-core/src/net_sync.rs | 57 ++- crates/starfish-core/src/network.rs | 8 +- crates/starfish-core/src/sailfish_service.rs | 455 +++++++++++++++--- .../starfish-core/src/shard_reconstructor.rs | 1 + crates/starfish-core/src/syncer.rs | 20 +- crates/starfish-core/src/threshold_clock.rs | 1 + crates/starfish-core/src/types.rs | 203 +++++++- scripts/dryrun.sh | 2 +- 16 files changed, 970 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index cf7c6d74..7f92bd4b 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Three versions of Starfish are available in this repository: The repository also supports other partially synchronous DAG-based consensus protocols: -- **`sailfish++`**: Implementation based on [SFSailfish](https://eprint.iacr.org/2025/535) ("Optimistic, Signature-Free Reliable Broadcast and Its Applications", CCS'25). +- **`sailfish++`**: Implementation based on [SFSailfish](https://arxiv.org/abs/2505.02761) ("Optimistic, Signature-Free Reliable Broadcast and Its Applications", CCS'25). A certified DAG protocol using signature-free optimistic reliable broadcast (RBC) for vertex certification. Achieves 2-round optimistic commit latency with authentication derived from TCP channels rather than cryptographic signatures. - **`mysticeti`**: Implementation of [Mysticeti](https://www.cs.cornell.edu/~babel/papers/mysticeti.pdf). diff --git a/crates/starfish-core/src/bls_certificate_aggregator.rs b/crates/starfish-core/src/bls_certificate_aggregator.rs index 2b7d2b3d..b896c0a5 100644 --- a/crates/starfish-core/src/bls_certificate_aggregator.rs +++ b/crates/starfish-core/src/bls_certificate_aggregator.rs @@ -1100,6 +1100,7 @@ mod tests { None, None, None, + None, ) } diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index 5fd2c5f0..46be348f 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -186,17 +186,57 @@ impl UniversalCommitter { let highest_possible_leader_to_decide_round = highest_known_round.saturating_sub(1); let mut committed = Vec::new(); let mut newly_committed = AHashSet::new(); + let mut skipped = AHashSet::new(); for round in last_decided_round + 1..=highest_possible_leader_to_decide_round { let leader = self.committee.elect_leader(round); + + // Direct skip: if a no-vote certificate exists for this slot, + // the leader provably cannot be committed. + if self.dag_state.has_novote_cert(round, leader) { + skipped.insert((leader, round)); + let key = (leader, round); + if !self.decided.contains_key(&key) { + let status = LeaderStatus::Skip(leader, round); + self.decided.insert(key, status.clone()); + if self.metrics_emitted.insert(key) { + tracing::debug!("Decided {status}"); + self.update_metrics(&status, true); + } + committed.push(status); + } + continue; + } + let Some(anchor) = self.try_direct_commit_block_sailfish(leader, round) else { continue; }; + // Backward walk: resolve older slots between last_decided and + // this anchor. If anchor has a causal path to a certified leader + // at slot s, commit it. If an NVC exists for slot s, skip it. let mut chain = vec![anchor.clone()]; let mut current = anchor; for prev_round in (last_decided_round + 1..current.round()).rev() { let prev_leader = self.committee.elect_leader(prev_round); + let prev_key = (prev_leader, prev_round); + if newly_committed.contains(&prev_key) || skipped.contains(&prev_key) { + continue; + } + + // Check for NVC-based skip. + if self.dag_state.has_novote_cert(prev_round, prev_leader) { + skipped.insert(prev_key); + let status = LeaderStatus::Skip(prev_leader, prev_round); + self.decided.insert(prev_key, status.clone()); + if self.metrics_emitted.insert(prev_key) { + tracing::debug!("Decided {status}"); + self.update_metrics(&status, false); + } + committed.push(status); + continue; + } + let mut linked_leaders: Vec<_> = self .dag_state .get_blocks_at_authority_round(prev_leader, prev_round) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 39719e92..f370cbf4 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -34,7 +34,7 @@ use crate::{ types::{ AuthorityIndex, BaseTransaction, BlockReference, BlsAggregateCertificate, Encoder, PartialSig, PartialSigKind, ProvableShard, ReconstructedTransactionData, RoundNumber, - Shard, VerifiedBlock, + SailfishFields, Shard, VerifiedBlock, }, }; @@ -518,6 +518,16 @@ impl Core { "Core::new_block::collect_transactions_and_references", self.collect_transactions_and_references(pending_transactions, clock_round) ); + + // SailfishPlusPlus: gate block creation on control-plane certs when + // we lack a parent link to the previous leader. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && !self.sailfish_control_ready(clock_round, &block_references) + { + self.requeue_transactions(std::mem::take(&mut transactions)); + return None; + } + let starfish_speed_excluded_authors = self.starfish_speed_excluded_ack_authors(clock_round); if starfish_speed_excluded_authors & (1u128 << self.authority) != 0 { self.requeue_transactions(std::mem::take(&mut transactions)); @@ -837,6 +847,16 @@ impl Core { None }; + // SailfishPlusPlus: compute control-plane fields (TC / NVC). + let sailfish_fields = if self.dag_state.consensus_protocol + == ConsensusProtocol::SailfishPlusPlus + && clock_round > 1 + { + self.compute_sailfish_fields(clock_round, &block_references) + } else { + None + }; + let mut block = VerifiedBlock::new_with_signer( self.authority, clock_round, @@ -856,6 +876,7 @@ impl Core { certified_leader, precomputed_round_sig, precomputed_leader_sig, + sailfish_fields, ); self.metrics @@ -869,6 +890,82 @@ impl Core { Data::new(block) } + /// Compute SailfishPlusPlus control fields for a new block at + /// `clock_round`. + /// + /// Returns `Some(fields)` with the control certs to embed, or `None` if no + /// control fields are needed (block has a path to the previous leader). + /// + /// The caller must ensure that when this returns `None` the block actually + /// has a parent link to the previous leader. The gating logic that blocks + /// block creation when certs are missing lives in `try_new_block`. + fn compute_sailfish_fields( + &self, + clock_round: RoundNumber, + block_references: &[BlockReference], + ) -> Option { + let prev_round = clock_round - 1; + let prev_leader = self.committee.elect_leader(prev_round); + + // If we have a direct parent to the previous leader, no control + // certs are needed. + let has_path_to_prev_leader = block_references + .iter() + .any(|r| r.round == prev_round && r.authority == prev_leader); + + if has_path_to_prev_leader { + return None; + } + + // We lack a path — collect the control certs. + let timeout_cert = self.dag_state.get_timeout_cert(prev_round); + let is_leader = self.committee.elect_leader(clock_round) == self.authority; + let no_vote_cert = if is_leader { + self.dag_state.get_novote_cert(prev_round, prev_leader) + } else { + None + }; + + Some(SailfishFields { + timeout_cert, + no_vote_cert, + }) + } + + /// Check whether Sailfish++ control-plane prerequisites are met for + /// creating a block in `clock_round`. Returns true if block creation can + /// proceed. + fn sailfish_control_ready( + &self, + clock_round: RoundNumber, + block_references: &[BlockReference], + ) -> bool { + if clock_round <= 1 { + return true; + } + let prev_round = clock_round - 1; + let prev_leader = self.committee.elect_leader(prev_round); + + let has_path = block_references + .iter() + .any(|r| r.round == prev_round && r.authority == prev_leader); + if has_path { + return true; + } + + // Must have a TC for the previous round. + if !self.dag_state.has_timeout_cert(prev_round) { + return false; + } + // Leader must additionally have a NVC. + if self.committee.elect_leader(clock_round) == self.authority + && !self.dag_state.has_novote_cert(prev_round, prev_leader) + { + return false; + } + true + } + fn prepare_last_blocks(&mut self) { let target = match self.dag_state.byzantine_strategy { Some( diff --git a/crates/starfish-core/src/core_thread/spawned.rs b/crates/starfish-core/src/core_thread/spawned.rs index c666d750..b5a9fef9 100644 --- a/crates/starfish-core/src/core_thread/spawned.rs +++ b/crates/starfish-core/src/core_thread/spawned.rs @@ -16,7 +16,7 @@ use crate::{ syncer::{CommitObserver, Syncer, SyncerSignals}, types::{ AuthorityIndex, BlockReference, ProvableShard, ReconstructedTransactionData, RoundNumber, - VerifiedBlock, + SailfishNoVoteCert, SailfishTimeoutCert, VerifiedBlock, }, }; @@ -67,6 +67,10 @@ enum CoreThreadCommand { ApplyCertificateEvents(Vec, oneshot::Sender<()>), /// Apply Sailfish RBC-certified vertices on the core thread. ApplySailfishCertificates(Vec, oneshot::Sender<()>), + /// Store a Sailfish++ timeout certificate in DagState. + ApplyTimeoutCert(SailfishTimeoutCert, oneshot::Sender<()>), + /// Store a Sailfish++ no-vote certificate in DagState. + ApplyNoVoteCert(SailfishNoVoteCert, oneshot::Sender<()>), } impl @@ -180,6 +184,22 @@ impl) { let (sender, receiver) = oneshot::channel(); @@ -339,6 +359,22 @@ impl CoreThread { self.syncer.apply_sailfish_certificates(certified_refs); sender.send(()).ok(); } + CoreThreadCommand::ApplyTimeoutCert(cert, sender) => { + metrics + .core_thread_tasks_total + .with_label_values(&["apply_timeout_cert"]) + .inc(); + self.syncer.apply_timeout_cert(cert); + sender.send(()).ok(); + } + CoreThreadCommand::ApplyNoVoteCert(cert, sender) => { + metrics + .core_thread_tasks_total + .with_label_values(&["apply_novote_cert"]) + .inc(); + self.syncer.apply_novote_cert(cert); + sender.send(()).ok(); + } } } self.syncer diff --git a/crates/starfish-core/src/crypto.rs b/crates/starfish-core/src/crypto.rs index aa50be76..0b507bd0 100644 --- a/crates/starfish-core/src/crypto.rs +++ b/crates/starfish-core/src/crypto.rs @@ -52,6 +52,25 @@ pub fn bls_dac_message(ack_ref: &BlockReference) -> [u8; 32] { hasher.finalize().into() } +/// Build the 32-byte digest for a Sailfish++ timeout message. +/// Domain separation: `b"sf_timeout" || round`. +pub fn sailfish_timeout_digest(round: RoundNumber) -> [u8; 32] { + let mut hasher = Blake3Hasher::new(); + hasher.update(b"sf_timeout"); + round.crypto_hash(&mut hasher); + hasher.finalize().into() +} + +/// Build the 32-byte digest for a Sailfish++ no-vote message. +/// Domain separation: `b"sf_novote" || round || leader`. +pub fn sailfish_novote_digest(round: RoundNumber, leader: AuthorityIndex) -> [u8; 32] { + let mut hasher = Blake3Hasher::new(); + hasher.update(b"sf_novote"); + round.crypto_hash(&mut hasher); + leader.crypto_hash(&mut hasher); + hasher.finalize().into() +} + pub const SIGNATURE_SIZE: usize = 64; pub const BLOCK_DIGEST_SIZE: usize = 32; @@ -399,6 +418,17 @@ impl PublicKey { pub fn from_bytes(bytes: &[u8; 32]) -> Result { ed25519_consensus::VerificationKey::try_from(*bytes).map(Self) } + + /// Verify an Ed25519 signature over a 32-byte digest. Used for + /// Sailfish++ timeout and no-vote message verification. + pub fn verify_digest_signature( + &self, + digest: &[u8; 32], + signature: &SignatureBytes, + ) -> Result<(), ed25519_consensus::Error> { + let sig = ed25519_consensus::Signature::from(signature.0); + self.0.verify(&sig, digest.as_ref()) + } } impl Signer { @@ -435,6 +465,13 @@ impl Signer { SignatureBytes(signature.to_bytes()) } + /// Sign a pre-computed 32-byte digest. Used for Sailfish++ control + /// messages (timeout, no-vote) that don't fit the block-signing schema. + pub fn sign_digest(&self, digest: &[u8; 32]) -> SignatureBytes { + let signature = self.0.sign(digest.as_ref()); + SignatureBytes(signature.to_bytes()) + } + pub fn public_key(&self) -> PublicKey { PublicKey(self.0.verification_key()) } @@ -522,6 +559,12 @@ impl Default for SignatureBytes { } } +impl fmt::Debug for SignatureBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Sig({})", &hex::encode(&self.0[..4])) + } +} + impl Serialize for SignatureBytes { #[inline] fn serialize(&self, serializer: S) -> Result { diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 82f5f074..c44d121d 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -32,7 +32,8 @@ use crate::{ threshold_clock::ThresholdClockAggregator, types::{ AuthorityIndex, BlockDigest, BlockReference, BlsAggregateCertificate, ProvableShard, - RoundNumber, Stake, TransactionData, VerifiedBlock, + RoundNumber, SailfishNoVoteCert, SailfishTimeoutCert, Stake, TransactionData, + VerifiedBlock, }, }; @@ -310,6 +311,10 @@ struct DagStateInner { /// Per-authority RBC vertex certificates (SailfishPlusPlus). /// Stored as BTreeSet per authority for split_off cleanup. vertex_certificates: Vec>, + /// Sailfish++ timeout certificates, indexed by round. + sailfish_timeout_certs: BTreeMap, + /// Sailfish++ no-vote certificates, indexed by (round, leader). + sailfish_novote_certs: BTreeMap<(RoundNumber, AuthorityIndex), SailfishNoVoteCert>, } impl DagState { @@ -377,6 +382,8 @@ impl DagState { precomputed_round_sigs: BTreeMap::new(), precomputed_leader_sigs: BTreeMap::new(), vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), + sailfish_timeout_certs: BTreeMap::new(), + sailfish_novote_certs: BTreeMap::new(), }; let mut builder = RecoveredStateBuilder::new(); let replay_started = Instant::now(); @@ -989,6 +996,62 @@ impl DagState { self.committee.is_quorum(stake) } + /// Store a Sailfish++ timeout certificate for a round. + pub fn add_timeout_cert(&self, cert: SailfishTimeoutCert) { + let mut inner = self.dag_state_inner.write(); + inner + .sailfish_timeout_certs + .entry(cert.round) + .or_insert(cert); + } + + /// Retrieve a stored timeout certificate for a round. + pub fn get_timeout_cert(&self, round: RoundNumber) -> Option { + self.dag_state_inner + .read() + .sailfish_timeout_certs + .get(&round) + .cloned() + } + + /// Check whether a timeout certificate exists for a round. + pub fn has_timeout_cert(&self, round: RoundNumber) -> bool { + self.dag_state_inner + .read() + .sailfish_timeout_certs + .contains_key(&round) + } + + /// Store a Sailfish++ no-vote certificate for a (round, leader) slot. + pub fn add_novote_cert(&self, cert: SailfishNoVoteCert) { + let mut inner = self.dag_state_inner.write(); + inner + .sailfish_novote_certs + .entry((cert.round, cert.leader)) + .or_insert(cert); + } + + /// Retrieve a stored no-vote certificate for (round, leader). + pub fn get_novote_cert( + &self, + round: RoundNumber, + leader: AuthorityIndex, + ) -> Option { + self.dag_state_inner + .read() + .sailfish_novote_certs + .get(&(round, leader)) + .cloned() + } + + /// Check whether a no-vote certificate exists for a (round, leader) slot. + pub fn has_novote_cert(&self, round: RoundNumber, leader: AuthorityIndex) -> bool { + self.dag_state_inner + .read() + .sailfish_novote_certs + .contains_key(&(round, leader)) + } + pub fn dac_certificate_state( &self, block_ref: &BlockReference, @@ -2155,6 +2218,9 @@ impl DagStateInner { }; self.vertex_certificates[auth] = self.vertex_certificates[auth].split_off(&split_ref); } + // Prune Sailfish++ timeout and no-vote certificates. + self.sailfish_timeout_certs = self.sailfish_timeout_certs.split_off(&min_evicted); + self.sailfish_novote_certs = self.sailfish_novote_certs.split_off(&(min_evicted, 0)); } pub fn add_block( @@ -2830,6 +2896,7 @@ mod tests { TransactionsCommitment::default(), Some(strong_vote_mask), None, + None, ); block.preserialize(); Data::new(block) @@ -2867,6 +2934,7 @@ mod tests { merkle_root, None, None, + None, ); block.preserialize(); Data::new(block) @@ -2900,6 +2968,7 @@ mod tests { merkle_root, None, None, + None, ); block.preserialize(); Data::new(block) diff --git a/crates/starfish-core/src/decoder.rs b/crates/starfish-core/src/decoder.rs index 6ab319e9..d10940ed 100644 --- a/crates/starfish-core/src/decoder.rs +++ b/crates/starfish-core/src/decoder.rs @@ -170,6 +170,7 @@ mod tests { None, None, None, + None, ); (block, encoded) } diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index 3125769f..21c9e2dc 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -386,6 +386,22 @@ impl ConnectionHandler { + if msg.sender != self.peer_id { + return true; + } + if let Some(ref sf) = self.sailfish_service { + sf.send(SailfishServiceMessage::TimeoutMsg(msg)); + } + } + NetworkMessage::SailfishNoVote(msg) => { + if msg.sender != self.peer_id { + return true; + } + if let Some(ref sf) = self.sailfish_service { + sf.send(SailfishServiceMessage::NoVoteMsg(msg)); + } + } } true } @@ -1263,7 +1279,7 @@ impl NetworkSyncer .iter() .filter_map(|event| match event { SailfishCertEvent::Certified(block_ref) => Some(*block_ref), - SailfishCertEvent::Broadcast(_) => None, + _ => None, }) .collect(); if !certified_refs.is_empty() { @@ -1272,7 +1288,19 @@ impl NetworkSyncer .apply_sailfish_certificates(certified_refs) .await; } - // Broadcast Vote/Ready messages + // Apply timeout/novote certs to dag state + for event in &events { + match event { + SailfishCertEvent::TimeoutReady(cert) => { + event_inner.syncer.apply_timeout_cert(cert.clone()).await; + } + SailfishCertEvent::NoVoteReady(cert) => { + event_inner.syncer.apply_novote_cert(cert.clone()).await; + } + _ => {} + } + } + // Broadcast Vote/Ready/Timeout/NoVote messages { let senders: Vec<_> = event_inner.peer_senders.read().values().cloned().collect(); @@ -1287,7 +1315,29 @@ impl NetworkSyncer .await; } } - SailfishCertEvent::Certified(_) => {} + SailfishCertEvent::BroadcastTimeout(msg) => { + for sender in &senders { + send_network_message_reliably( + sender, + NetworkMessage::SailfishTimeout(msg.clone()), + ) + .await; + } + } + SailfishCertEvent::SendNoVote(msg) => { + // No-vote messages are sent only to the + // next-round leader. For now broadcast. + for sender in &senders { + send_network_message_reliably( + sender, + NetworkMessage::SailfishNoVote(msg.clone()), + ) + .await; + } + } + SailfishCertEvent::Certified(_) + | SailfishCertEvent::TimeoutReady(_) + | SailfishCertEvent::NoVoteReady(_) => {} } } } @@ -1708,6 +1758,7 @@ mod tests { TransactionsCommitment::default(), None, None, + None, )); let mut ck = ConnectionKnowledge::new(1, 4); diff --git a/crates/starfish-core/src/network.rs b/crates/starfish-core/src/network.rs index 22204d2b..7db4f795 100644 --- a/crates/starfish-core/src/network.rs +++ b/crates/starfish-core/src/network.rs @@ -31,7 +31,7 @@ use crate::{ stat::HistogramSender, types::{ AuthorityIndex, BlockReference, CertMessage, CertMessageKind, PartialSig, ProvableShard, - RoundNumber, VerifiedBlock, + RoundNumber, SailfishNoVoteMsg, SailfishTimeoutMsg, VerifiedBlock, }, }; @@ -158,6 +158,10 @@ pub enum NetworkMessage { PartialSig(PartialSig), /// SailfishPlusPlus: Optimistic RBC message with phase metadata. CertMessage(CertMessage), + /// SailfishPlusPlus: Signed timeout message for round advancement. + SailfishTimeout(SailfishTimeoutMsg), + /// SailfishPlusPlus: Signed no-vote message for leader skip proof. + SailfishNoVote(SailfishNoVoteMsg), } impl NetworkMessage { @@ -173,6 +177,8 @@ impl NetworkMessage { CertMessageKind::Vote => "cert_vote", CertMessageKind::Ready => "cert_ready", }, + Self::SailfishTimeout(_) => "sailfish_timeout", + Self::SailfishNoVote(_) => "sailfish_no_vote", } } } diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index 5a710ef4..f35db1cf 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -1,42 +1,81 @@ // Copyright (c) 2025 IOTA Stiftung // SPDX-License-Identifier: Apache-2.0 -//! Async Sailfish++ RBC certification service. +//! Async Sailfish++ certification and control service. //! //! Receives blocks and signature-free RBC phase messages, runs the -//! [`CertificationAggregator`], and emits certification events back to the -//! syncer thread. +//! [`CertificationAggregator`], aggregates timeout and no-vote messages, +//! and emits certification/control events back to the syncer thread. use std::{collections::VecDeque, sync::Arc}; +use ahash::AHashMap; use tokio::sync::mpsc; use crate::{ cert_aggregator::{CertEvent, CertificationAggregator}, committee::Committee, + crypto::SignatureBytes, metrics::Metrics, - types::{AuthorityIndex, BlockReference, CertMessage, CertMessageKind, RoundNumber}, + types::{ + AuthorityIndex, BlockReference, CertMessage, CertMessageKind, RoundNumber, + SailfishNoVoteCert, SailfishNoVoteMsg, SailfishTimeoutCert, SailfishTimeoutMsg, Stake, + }, }; +// --------------------------------------------------------------------------- +// Messages into the service +// --------------------------------------------------------------------------- + /// Messages sent to the Sailfish service. +#[allow(dead_code)] pub enum SailfishServiceMessage { /// New blocks arrived — generate self-echoes. ProcessBlocks(Vec), /// Incoming RBC message from a peer. CertMessage(CertMessage), + /// Incoming signed timeout message from a peer. + TimeoutMsg(SailfishTimeoutMsg), + /// Incoming signed no-vote message from a peer. + NoVoteMsg(SailfishNoVoteMsg), + /// Local timeout expired for a round — sign and broadcast own timeout. + LocalTimeout(RoundNumber), + /// Local no-vote: we advanced past round `round` without voting for + /// `leader`. Sign and send to the next-round leader. + LocalNoVote { + round: RoundNumber, + leader: AuthorityIndex, + }, /// Cleanup aggregator state below round. Cleanup(RoundNumber), } +// --------------------------------------------------------------------------- +// Events out of the service +// --------------------------------------------------------------------------- + /// Events sent back to the syncer/core thread. #[derive(Debug, Clone)] +#[allow(dead_code)] pub enum SailfishCertEvent { /// Block certified (fast or slow path). Certified(BlockReference), /// Broadcast an RBC phase message to all peers. Broadcast(CertMessage), + /// Broadcast a signed timeout message to all peers. + BroadcastTimeout(SailfishTimeoutMsg), + /// Send a signed no-vote message to the next-round leader. + SendNoVote(SailfishNoVoteMsg), + /// Timeout certificate formed for a round. + TimeoutReady(SailfishTimeoutCert), + /// No-vote certificate formed for (round, leader). + NoVoteReady(SailfishNoVoteCert), } +// --------------------------------------------------------------------------- +// Handle +// --------------------------------------------------------------------------- + /// Handle for sending messages to the Sailfish service. #[derive(Clone)] pub struct SailfishServiceHandle { @@ -53,6 +92,59 @@ impl SailfishServiceHandle { } } +// --------------------------------------------------------------------------- +// Signed quorum aggregator — shared by timeout and no-vote paths +// --------------------------------------------------------------------------- + +/// Accumulates signed messages from distinct authorities until a quorum +/// (2f+1 stake) is reached. Used for both timeout and no-vote certificates. +struct SignedQuorumAggregator { + stake: Stake, + seen: u128, + signatures: Vec<(AuthorityIndex, SignatureBytes)>, + formed: bool, +} + +impl SignedQuorumAggregator { + fn new() -> Self { + Self { + stake: 0, + seen: 0, + signatures: Vec::new(), + formed: false, + } + } + + /// Try to add a signed message. Returns `true` when quorum is reached + /// for the first time. + fn add( + &mut self, + sender: AuthorityIndex, + signature: SignatureBytes, + committee: &Committee, + ) -> bool { + if self.formed { + return false; + } + let mask = 1u128 << sender; + if self.seen & mask != 0 { + return false; + } + self.seen |= mask; + self.stake += committee.get_stake(sender).unwrap_or(0); + self.signatures.push((sender, signature)); + if self.stake >= committee.quorum_threshold() { + self.formed = true; + return true; + } + false + } +} + +// --------------------------------------------------------------------------- +// Service lifetime +// --------------------------------------------------------------------------- + /// Start the Sailfish RBC certification service as a tokio task. pub fn start_sailfish_service( committee: Arc, @@ -75,15 +167,15 @@ async fn run_sailfish_service( mut receiver: mpsc::UnboundedReceiver, event_tx: mpsc::UnboundedSender>, ) { - let mut aggregator = CertificationAggregator::new(committee); + let mut state = ServiceState::new(committee, own_authority); while let Some(msg) = receiver.recv().await { let mut all_events = Vec::new(); - process_message(msg, &mut aggregator, own_authority, &mut all_events); + state.process_message(msg, &mut all_events); while let Ok(msg) = receiver.try_recv() { - process_message(msg, &mut aggregator, own_authority, &mut all_events); + state.process_message(msg, &mut all_events); } if !all_events.is_empty() { @@ -92,74 +184,170 @@ async fn run_sailfish_service( } } -fn process_message( - msg: SailfishServiceMessage, - aggregator: &mut CertificationAggregator, +// --------------------------------------------------------------------------- +// Service state (all aggregators) +// --------------------------------------------------------------------------- + +struct ServiceState { + committee: Arc, own_authority: AuthorityIndex, - events: &mut Vec, -) { - match msg { - SailfishServiceMessage::ProcessBlocks(block_refs) => { - for block_ref in block_refs { - let echo = CertMessage { - block_ref, - sender: own_authority, - kind: CertMessageKind::Echo, - }; - let cert_events = aggregator.add_message(&echo); - dispatch_cert_events(aggregator, cert_events, own_authority, events); - events.push(SailfishCertEvent::Broadcast(echo)); - } - } - SailfishServiceMessage::CertMessage(message) => { - let cert_events = aggregator.add_message(&message); - dispatch_cert_events(aggregator, cert_events, own_authority, events); - } - SailfishServiceMessage::Cleanup(round) => { - aggregator.cleanup_below_round(round); + rbc: CertificationAggregator, + timeouts: AHashMap, + no_votes: AHashMap<(RoundNumber, AuthorityIndex), SignedQuorumAggregator>, +} + +impl ServiceState { + fn new(committee: Arc, own_authority: AuthorityIndex) -> Self { + Self { + rbc: CertificationAggregator::new(committee.clone()), + committee, + own_authority, + timeouts: AHashMap::new(), + no_votes: AHashMap::new(), } } -} -fn dispatch_cert_events( - aggregator: &mut CertificationAggregator, - cert_events: Vec, - own_authority: AuthorityIndex, - out: &mut Vec, -) { - let mut pending = VecDeque::from(cert_events); - while let Some(event) = pending.pop_front() { - match event { - CertEvent::FastDelivery(block_ref) | CertEvent::SlowDelivery(block_ref) => { - out.push(SailfishCertEvent::Certified(block_ref)); + fn process_message( + &mut self, + msg: SailfishServiceMessage, + events: &mut Vec, + ) { + match msg { + SailfishServiceMessage::ProcessBlocks(block_refs) => { + for block_ref in block_refs { + let echo = CertMessage { + block_ref, + sender: self.own_authority, + kind: CertMessageKind::Echo, + }; + let cert_events = self.rbc.add_message(&echo); + self.dispatch_cert_events(cert_events, events); + events.push(SailfishCertEvent::Broadcast(echo)); + } } - CertEvent::SendVote(block_ref) => { - let vote = CertMessage { - block_ref, - sender: own_authority, - kind: CertMessageKind::Vote, - }; - pending.extend(aggregator.add_message(&vote)); - out.push(SailfishCertEvent::Broadcast(vote)); + SailfishServiceMessage::CertMessage(message) => { + let cert_events = self.rbc.add_message(&message); + self.dispatch_cert_events(cert_events, events); } - CertEvent::SendReady(block_ref) => { - let ready = CertMessage { - block_ref, - sender: own_authority, - kind: CertMessageKind::Ready, - }; - pending.extend(aggregator.add_message(&ready)); - out.push(SailfishCertEvent::Broadcast(ready)); + SailfishServiceMessage::TimeoutMsg(msg) => { + self.add_timeout_msg(msg, events); + } + SailfishServiceMessage::NoVoteMsg(msg) => { + self.add_novote_msg(msg, events); + } + SailfishServiceMessage::LocalTimeout(round) => { + self.handle_local_timeout(round, events); + } + SailfishServiceMessage::LocalNoVote { round, leader } => { + self.handle_local_novote(round, leader, events); + } + SailfishServiceMessage::Cleanup(round) => { + self.rbc.cleanup_below_round(round); + self.timeouts.retain(|&r, _| r >= round); + self.no_votes.retain(|&(r, _), _| r >= round); } } } + + // -- RBC event dispatch -------------------------------------------------- + + fn dispatch_cert_events( + &mut self, + cert_events: Vec, + out: &mut Vec, + ) { + let mut pending = VecDeque::from(cert_events); + while let Some(event) = pending.pop_front() { + match event { + CertEvent::FastDelivery(block_ref) | CertEvent::SlowDelivery(block_ref) => { + out.push(SailfishCertEvent::Certified(block_ref)); + } + CertEvent::SendVote(block_ref) => { + let vote = CertMessage { + block_ref, + sender: self.own_authority, + kind: CertMessageKind::Vote, + }; + pending.extend(self.rbc.add_message(&vote)); + out.push(SailfishCertEvent::Broadcast(vote)); + } + CertEvent::SendReady(block_ref) => { + let ready = CertMessage { + block_ref, + sender: self.own_authority, + kind: CertMessageKind::Ready, + }; + pending.extend(self.rbc.add_message(&ready)); + out.push(SailfishCertEvent::Broadcast(ready)); + } + } + } + } + + // -- Timeout aggregation ------------------------------------------------- + + fn handle_local_timeout(&mut self, round: RoundNumber, events: &mut Vec) { + // The caller (net_sync) is responsible for signing the message before + // sending it here. This method just creates a pre-signed local timeout. + // Actually, we sign here so the service can be the single signing point. + // Net_sync will pass a Signer handle if needed, but for now the + // LocalTimeout variant expects the caller to have already called + // send_sailfish_message with a pre-signed message. + // + // For now, LocalTimeout just tells us we should have sent the timeout + // already — the actual signing happens in net_sync.rs. + let _ = (round, events); + } + + fn add_timeout_msg(&mut self, msg: SailfishTimeoutMsg, events: &mut Vec) { + let agg = self + .timeouts + .entry(msg.round) + .or_insert_with(SignedQuorumAggregator::new); + if agg.add(msg.sender, msg.signature, &self.committee) { + events.push(SailfishCertEvent::TimeoutReady(SailfishTimeoutCert { + round: msg.round, + signatures: agg.signatures.clone(), + })); + } + } + + // -- No-vote aggregation ------------------------------------------------- + + fn handle_local_novote( + &mut self, + round: RoundNumber, + leader: AuthorityIndex, + events: &mut Vec, + ) { + let _ = (round, leader, events); + } + + fn add_novote_msg(&mut self, msg: SailfishNoVoteMsg, events: &mut Vec) { + let agg = self + .no_votes + .entry((msg.round, msg.leader)) + .or_insert_with(SignedQuorumAggregator::new); + if agg.add(msg.sender, msg.signature, &self.committee) { + events.push(SailfishCertEvent::NoVoteReady(SailfishNoVoteCert { + round: msg.round, + leader: msg.leader, + signatures: agg.signatures.clone(), + })); + } + } } +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + #[cfg(test)] mod tests { use prometheus::Registry; use super::*; + use crate::crypto; fn make_committee(n: usize) -> Arc { Committee::new_test(vec![1; n]) @@ -295,4 +483,153 @@ mod tests { matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) })); } + + /// N=4, F=1: quorum_threshold = 3. Three timeout messages form a TC. + #[tokio::test] + async fn timeout_cert_formation() { + let committee = make_committee(4); + let own_authority = 0; + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee.clone(), + own_authority, + msg_rx, + event_tx, + test_metrics(), + ); + + let round = 5; + let signers = crate::crypto::Signer::new_for_test(4); + let digest = crypto::sailfish_timeout_digest(round); + + // Send 3 timeout messages (quorum = 3 for N=4) + for sender in 0..3u8 { + let sig = signers[sender as usize].sign_digest(&digest); + msg_tx + .send(SailfishServiceMessage::TimeoutMsg(SailfishTimeoutMsg { + round, + sender, + signature: sig, + })) + .unwrap(); + } + + let events = event_rx.recv().await.expect("expected timeout cert"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::TimeoutReady(cert) if cert.round == round + && cert.signatures.len() == 3 + ) + })); + } + + /// N=4, F=1: quorum_threshold = 3. Three no-vote messages form a NVC. + #[tokio::test] + async fn novote_cert_formation() { + let committee = make_committee(4); + let own_authority = 0; + let leader = 2; + let round = 7; + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee.clone(), + own_authority, + msg_rx, + event_tx, + test_metrics(), + ); + + let signers = crate::crypto::Signer::new_for_test(4); + let digest = crypto::sailfish_novote_digest(round, leader); + + for sender in 0..3u8 { + let sig = signers[sender as usize].sign_digest(&digest); + msg_tx + .send(SailfishServiceMessage::NoVoteMsg(SailfishNoVoteMsg { + round, + leader, + sender, + signature: sig, + })) + .unwrap(); + } + + let events = event_rx.recv().await.expect("expected novote cert"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::NoVoteReady(cert) if cert.round == round + && cert.leader == leader + && cert.signatures.len() == 3 + ) + })); + } + + /// Duplicate timeout messages from the same sender are ignored. + /// Sends sender=0 twice, then senders 1 and 2. With dedup, the cert + /// should form from exactly 3 unique signers (0, 1, 2), not 4. + #[tokio::test] + async fn duplicate_timeout_ignored() { + let committee = make_committee(4); + let own_authority = 0; + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee.clone(), + own_authority, + msg_rx, + event_tx, + test_metrics(), + ); + + let round = 3; + let signers = crate::crypto::Signer::new_for_test(4); + let digest = crypto::sailfish_timeout_digest(round); + let sig = signers[0].sign_digest(&digest); + + // Send same timeout twice from sender 0 (duplicate) + msg_tx + .send(SailfishServiceMessage::TimeoutMsg(SailfishTimeoutMsg { + round, + sender: 0, + signature: sig, + })) + .unwrap(); + msg_tx + .send(SailfishServiceMessage::TimeoutMsg(SailfishTimeoutMsg { + round, + sender: 0, + signature: sig, + })) + .unwrap(); + // Two more unique senders to reach quorum (3) + msg_tx + .send(SailfishServiceMessage::TimeoutMsg(SailfishTimeoutMsg { + round, + sender: 1, + signature: signers[1].sign_digest(&digest), + })) + .unwrap(); + msg_tx + .send(SailfishServiceMessage::TimeoutMsg(SailfishTimeoutMsg { + round, + sender: 2, + signature: signers[2].sign_digest(&digest), + })) + .unwrap(); + + let events = event_rx.recv().await.expect("expected timeout cert"); + let cert = events.iter().find_map(|e| match e { + SailfishCertEvent::TimeoutReady(cert) => Some(cert), + _ => None, + }); + // Cert formed with exactly 3 unique signers (the duplicate was ignored) + assert_eq!(cert.unwrap().signatures.len(), 3); + } } diff --git a/crates/starfish-core/src/shard_reconstructor.rs b/crates/starfish-core/src/shard_reconstructor.rs index d85bc93a..bcc67c44 100644 --- a/crates/starfish-core/src/shard_reconstructor.rs +++ b/crates/starfish-core/src/shard_reconstructor.rs @@ -498,6 +498,7 @@ mod tests { None, None, None, + None, ); (block, encoded) } diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index d275b64c..5f7cd05d 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -21,7 +21,8 @@ use crate::{ sailfish_service::SailfishServiceMessage, types::{ AuthorityIndex, BlockReference, PartialSig, PartialSigKind, ProvableShard, - ReconstructedTransactionData, RoundNumber, Stake, VerifiedBlock, + ReconstructedTransactionData, RoundNumber, SailfishNoVoteCert, SailfishTimeoutCert, Stake, + VerifiedBlock, }, }; @@ -193,6 +194,23 @@ impl Syncer { } } + /// Store a Sailfish++ timeout certificate in DagState and retry block + /// creation (a TC may unblock block creation for the next round). + pub fn apply_timeout_cert(&mut self, cert: SailfishTimeoutCert) { + self.core.dag_state().add_timeout_cert(cert); + self.maybe_update_proposal_wait(); + self.try_new_block(BlockCreationReason::CertificateEvent); + } + + /// Store a Sailfish++ no-vote certificate in DagState and retry block + /// creation + commit (an NVC may enable direct skip). + pub fn apply_novote_cert(&mut self, cert: SailfishNoVoteCert) { + self.core.dag_state().add_novote_cert(cert); + self.maybe_update_proposal_wait(); + self.try_new_block(BlockCreationReason::CertificateEvent); + self.try_new_commit(); + } + /// Apply BLS certificate events from the BLS verification service. /// Fresh certificates can unblock both block production and sequencing, so /// retry both paths immediately when DAG state changed. diff --git a/crates/starfish-core/src/threshold_clock.rs b/crates/starfish-core/src/threshold_clock.rs index 07e500ca..74904584 100644 --- a/crates/starfish-core/src/threshold_clock.rs +++ b/crates/starfish-core/src/threshold_clock.rs @@ -115,6 +115,7 @@ mod tests { }, strong_vote: None, bls: None, + sailfish: None, serialized: None, } } diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index 1a667c2f..82f79e61 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -190,6 +190,63 @@ pub struct CertMessage { pub kind: CertMessageKind, } +// --------------------------------------------------------------------------- +// Sailfish++ control-plane messages (signed with Ed25519). +// Timeout certificates enable liveness when a leader is silent. +// No-vote certificates let honest leaders prove they didn't see the +// previous leader, enabling direct skip in the commit rule. +// --------------------------------------------------------------------------- + +/// Signed timeout message: "I haven't seen a certified leader for round +/// `round`." +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SailfishTimeoutMsg { + pub round: RoundNumber, + pub sender: AuthorityIndex, + pub signature: SignatureBytes, +} + +/// Aggregated timeout certificate: ≥ 2f+1 signed timeout messages for a round. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SailfishTimeoutCert { + pub round: RoundNumber, + pub signatures: Vec<(AuthorityIndex, SignatureBytes)>, +} + +/// Signed no-vote message: "I am advancing past round `round` without voting +/// for leader `leader`." Sent to the next-round leader so it can build a +/// no-vote certificate. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SailfishNoVoteMsg { + pub round: RoundNumber, + pub leader: AuthorityIndex, + pub sender: AuthorityIndex, + pub signature: SignatureBytes, +} + +/// Aggregated no-vote certificate: ≥ 2f+1 signed no-vote messages for a +/// (round, leader) slot. Embedded in the leader's block to prove it may skip +/// the previous leader. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SailfishNoVoteCert { + pub round: RoundNumber, + pub leader: AuthorityIndex, + pub signatures: Vec<(AuthorityIndex, SignatureBytes)>, +} + +/// Protocol-specific fields embedded in SailfishPlusPlus block headers. +/// Part of the signed block hash. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct SailfishFields { + /// Timeout certificate for the previous round, if this block advances + /// without a path to the previous-round leader. + pub timeout_cert: Option, + /// No-vote certificate for the previous round's leader slot, included + /// only by the current round's elected leader when it lacks a path to + /// the previous-round leader. + pub no_vote_cert: Option, +} + // --------------------------------------------------------------------------- // BlockHeader — signed, content-addressed block identity. // Contains exactly the fields that feed into BlockDigest::new() and @@ -222,6 +279,9 @@ pub struct BlockHeader { pub(crate) strong_vote: Option, /// BLS certificate fields (StarfishBls only). None for all other protocols. pub(crate) bls: Option>, + /// Sailfish++ control certificates (timeout/no-vote). None for all other + /// protocols. + pub(crate) sailfish: Option>, // -- Cache (not serialized) ----------------------------------------------- /// Cached bincode-serialized bytes. Populated by `preserialize()` off the @@ -354,6 +414,22 @@ impl BlockHeader { .unwrap_or(&[]) } + pub fn sailfish(&self) -> Option<&SailfishFields> { + self.sailfish.as_deref() + } + + pub fn sailfish_timeout_cert(&self) -> Option<&SailfishTimeoutCert> { + self.sailfish + .as_ref() + .and_then(|sf| sf.timeout_cert.as_ref()) + } + + pub fn sailfish_no_vote_cert(&self) -> Option<&SailfishNoVoteCert> { + self.sailfish + .as_ref() + .and_then(|sf| sf.no_vote_cert.as_ref()) + } + pub fn preserialize(&mut self) { if self.serialized.is_none() { self.serialized = Some( @@ -616,6 +692,7 @@ impl VerifiedBlock { merkle_root: TransactionsCommitment, strong_vote: Option, bls: Option, + sailfish: Option, ) -> Self { let (acknowledgment_intersection, acknowledgment_references) = compress_acknowledgments(&block_references, &acknowledgment_references); @@ -649,6 +726,7 @@ impl VerifiedBlock { }, strong_vote, bls: bls.map(Box::new), + sailfish: sailfish.map(Box::new), serialized: None, }; @@ -691,6 +769,7 @@ impl VerifiedBlock { }, strong_vote: None, bls: None, + sailfish: None, serialized: None, }; let mut block = Self { @@ -701,6 +780,7 @@ impl VerifiedBlock { Data::new(block) } + #[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)] pub fn new_with_signer( authority: AuthorityIndex, @@ -721,6 +801,7 @@ impl VerifiedBlock { certified_leader: Option<(BlockReference, BlsAggregateCertificate)>, precomputed_round_sig: Option, precomputed_leader_sig: Option, + sailfish: Option, ) -> Self { let transactions_commitment = match consensus_protocol { ConsensusProtocol::Starfish @@ -801,6 +882,7 @@ impl VerifiedBlock { transactions_commitment, strong_vote, bls, + sailfish, ) } @@ -882,6 +964,18 @@ impl VerifiedBlock { self.header.is_strong_blame() } + pub fn sailfish(&self) -> Option<&SailfishFields> { + self.header.sailfish() + } + + pub fn sailfish_timeout_cert(&self) -> Option<&SailfishTimeoutCert> { + self.header.sailfish_timeout_cert() + } + + pub fn sailfish_no_vote_cert(&self) -> Option<&SailfishNoVoteCert> { + self.header.sailfish_no_vote_cert() + } + // --- Payload accessors --- pub fn transaction_data(&self) -> Option<&TransactionData> { @@ -1202,9 +1296,7 @@ impl VerifiedBlock { "Only StarfishBls blocks may carry BLS fields" ); } - ConsensusProtocol::Mysticeti - | ConsensusProtocol::CordialMiners - | ConsensusProtocol::SailfishPlusPlus => { + ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners => { ensure!( acknowledgments.is_empty(), "{consensus_protocol:?} blocks must not carry acknowledgments" @@ -1218,10 +1310,107 @@ impl VerifiedBlock { "Only StarfishBls blocks may carry BLS fields" ); } + ConsensusProtocol::SailfishPlusPlus => { + ensure!( + acknowledgments.is_empty(), + "SailfishPlusPlus blocks must not carry acknowledgments" + ); + ensure!( + threshold_clock_valid_block_header(&self.header, committee), + "Threshold clock is not valid" + ); + ensure!( + self.header.bls().is_none(), + "Only StarfishBls blocks may carry BLS fields" + ); + // Validate Sailfish++ control fields. + if let Some(sf) = self.header.sailfish() { + self.verify_sailfish_fields(sf, committee)?; + } + } + } + Ok(()) + } + + /// Validate Sailfish++ timeout and no-vote certificates embedded in + /// the block header. + fn verify_sailfish_fields( + &self, + sf: &SailfishFields, + committee: &Committee, + ) -> eyre::Result<()> { + let round = self.round(); + ensure!(round > 1, "SailfishFields not expected in round 0 or 1"); + let prev_round = round - 1; + + if let Some(tc) = &sf.timeout_cert { + ensure!( + tc.round == prev_round, + "Timeout cert round {} does not match expected {}", + tc.round, + prev_round + ); + let digest = crypto::sailfish_timeout_digest(tc.round); + verify_signed_quorum(&tc.signatures, &digest, committee, "timeout")?; } + + if let Some(nvc) = &sf.no_vote_cert { + ensure!( + nvc.round == prev_round, + "NoVote cert round {} does not match expected {}", + nvc.round, + prev_round + ); + ensure!( + nvc.leader == committee.elect_leader(prev_round), + "NoVote cert leader {} does not match elected leader", + nvc.leader + ); + let digest = crypto::sailfish_novote_digest(nvc.round, nvc.leader); + verify_signed_quorum(&nvc.signatures, &digest, committee, "novote")?; + } + Ok(()) } } + +/// Verify that a vector of (authority, Ed25519 signature) pairs forms a valid +/// quorum over the given digest. Checks signer uniqueness, quorum stake, and +/// every signature. +fn verify_signed_quorum( + signatures: &[(AuthorityIndex, SignatureBytes)], + digest: &[u8; 32], + committee: &Committee, + label: &str, +) -> eyre::Result<()> { + let mut seen = 0u128; + let mut stake: Stake = 0; + for &(signer, ref sig) in signatures { + let mask = 1u128 << signer; + ensure!( + seen & mask == 0, + "Duplicate signer {} in {} cert", + signer, + label + ); + seen |= mask; + let pk = committee + .get_public_key(signer) + .ok_or_else(|| eyre::eyre!("Unknown signer {} in {} cert", signer, label))?; + pk.verify_digest_signature(digest, sig) + .map_err(|e| eyre::eyre!("Bad {} sig from {}: {}", label, signer, e))?; + stake += committee.get_stake(signer).unwrap_or(0); + } + ensure!( + stake >= committee.quorum_threshold(), + "{} cert stake {} < quorum {}", + label, + stake, + committee.quorum_threshold() + ); + Ok(()) +} + #[derive( Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize, Default, Debug, )] @@ -1457,6 +1646,7 @@ mod tests { None, None, None, + None, ); (block, committee) } @@ -1492,6 +1682,7 @@ mod tests { None, None, None, + None, ); (block, committee) } @@ -1513,6 +1704,7 @@ mod tests { TransactionsCommitment::default(), None, None, + None, ); assert_eq!(block.acknowledgment_intersection(), Some(2)); @@ -1551,6 +1743,7 @@ mod tests { }, strong_vote: None, bls: None, + sailfish: None, serialized: None, }; @@ -1596,6 +1789,7 @@ mod tests { None, None, None, + None, ); assert_eq!(block.acknowledgments(), vec![c, d]); @@ -1719,6 +1913,7 @@ mod tests { None, None, None, + None, ); let mut encoder = Encoder::new(2, 4, 2).unwrap(); @@ -1787,6 +1982,7 @@ mod tests { None, None, None, + None, ); let mut encoder = Encoder::new(2, 4, 2).unwrap(); @@ -1850,6 +2046,7 @@ mod tests { None, None, None, + None, ); let mut encoder = Encoder::new(2, 4, 2).unwrap(); diff --git a/scripts/dryrun.sh b/scripts/dryrun.sh index c54e598b..fa8e0d47 100755 --- a/scripts/dryrun.sh +++ b/scripts/dryrun.sh @@ -7,7 +7,7 @@ NUM_NODES=${NUM_NODES:-10} DESIRED_TPS=${DESIRED_TPS:-1000} # Options: starfish, starfish-speed, starfish-bls, -# cordial-miners, mysticeti +# sailfish-pp, cordial-miners, mysticeti CONSENSUS=${CONSENSUS:-starfish-speed} NUM_BYZANTINE_NODES=${NUM_BYZANTINE_NODES:-0} # Options: timeout-leader, leader-withholding, From 332690a2fe3af7fdeaec4f14c1b9d0b81696c676 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 01:38:33 +0100 Subject: [PATCH 05/21] fix(core): disable TC gate and reject Sailfish++ header-only blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the sailfish_control_ready gate from try_new_block — it deadlocks block creation because the timeout mechanism that produces TCs is not yet wired. The certified_parent_quorum gate already ensures safety; the control-plane gate will be re-enabled once timeout triggers are complete. Block creation is already retried on certificate events via apply_sailfish_certificates → try_new_block, so the certified parent quorum gate unblocks naturally as RBC completes. Also add explicit rejection of header-only blocks for protocols that require full blocks (SailfishPlusPlus, Mysticeti, CordialMiners). --- crates/starfish-core/src/core.rs | 21 +++++++++++++-------- crates/starfish-core/src/net_sync.rs | 16 ++++++++++++---- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index f370cbf4..069b3e4a 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -519,14 +519,18 @@ impl Core { self.collect_transactions_and_references(pending_transactions, clock_round) ); - // SailfishPlusPlus: gate block creation on control-plane certs when - // we lack a parent link to the previous leader. - if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus - && !self.sailfish_control_ready(clock_round, &block_references) - { - self.requeue_transactions(std::mem::take(&mut transactions)); - return None; - } + // NOTE: sailfish_control_ready() is intentionally NOT gated here yet. + // The timeout mechanism that produces TCs is not fully wired, so + // enforcing the TC requirement would deadlock block creation whenever + // the previous-round leader is not among our certified parents. + // Once the timeout trigger in net_sync.rs is complete, re-enable: + // + // if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + // && !self.sailfish_control_ready(clock_round, &block_references) + // { + // self.requeue_transactions(std::mem::take(&mut transactions)); + // return None; + // } let starfish_speed_excluded_authors = self.starfish_speed_excluded_ack_authors(clock_round); if starfish_speed_excluded_authors & (1u128 << self.authority) != 0 { @@ -935,6 +939,7 @@ impl Core { /// Check whether Sailfish++ control-plane prerequisites are met for /// creating a block in `clock_round`. Returns true if block creation can /// proceed. + #[allow(dead_code)] fn sailfish_control_ready( &self, clock_round: RoundNumber, diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index 21c9e2dc..a0fa4602 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -508,22 +508,30 @@ impl ConnectionHandler DataSource::BlockBundleStreamingHeader, other => other, }; self.process_block_headers(blocks_without_transactions, header_source) .await; + } else if !headers.is_empty() { + tracing::warn!( + "Rejecting {} header-only blocks from peer {} \ + (not supported by {:?})", + headers.len(), + self.peer_id, + self.consensus_protocol, + ); } // Process standalone shards — route directly to shard reconstructor From 31b44f2e179c948a3ddc0d94e0d88db8f8ac2c9a Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 02:00:12 +0100 Subject: [PATCH 06/21] feat(core): wire Sailfish++ control plane and fix parent selection Phase 1-2: Fix invalid block creation - After certified-parent filtering, check that remaining parents still have quorum stake at round-1. If not, requeue and wait. - For SailfishPlusPlus, preserve all previous-round references during compression so certified-parent filtering keeps quorum. Phase 3: Local timeout origination - ServiceState holds a Signer; handle_local_timeout signs, self-counts, and emits BroadcastTimeout. - leader_timeout_task sends LocalTimeout to sailfish service for SailfishPlusPlus before force_new_block. Phase 4: Local no-vote origination - handle_local_novote signs, self-counts, and emits SendNoVote. - create_new_block triggers LocalNoVote when the created block lacks a parent to the previous-round leader. - NoVote routed only to the next-round elected leader, not broadcast. Phase 5: Signature verification before aggregation - add_timeout_msg and add_novote_msg verify Ed25519 signatures against domain-separated digests before counting stake. Phase 6: Header validation (relaxed) - Validate TC/NVC signatures and quorum when present in block headers. - Do not yet enforce mandatory presence (control plane still ramping). --- crates/starfish-core/src/core.rs | 59 ++++++++-- crates/starfish-core/src/net_sync.rs | 36 +++++- crates/starfish-core/src/sailfish_service.rs | 115 ++++++++++++++++--- crates/starfish-core/src/syncer.rs | 21 +++- crates/starfish-core/src/types.rs | 5 +- 5 files changed, 201 insertions(+), 35 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 069b3e4a..afd4dded 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -519,18 +519,16 @@ impl Core { self.collect_transactions_and_references(pending_transactions, clock_round) ); - // NOTE: sailfish_control_ready() is intentionally NOT gated here yet. - // The timeout mechanism that produces TCs is not fully wired, so - // enforcing the TC requirement would deadlock block creation whenever - // the previous-round leader is not among our certified parents. - // Once the timeout trigger in net_sync.rs is complete, re-enable: - // - // if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus - // && !self.sailfish_control_ready(clock_round, &block_references) - // { - // self.requeue_transactions(std::mem::take(&mut transactions)); - // return None; - // } + // SailfishPlusPlus: if the certified-parent filter reduced the parent + // set below threshold-clock quorum, we cannot build a valid block yet. + // Requeue transactions and wait for more certifications. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && clock_round > 1 + && block_references.is_empty() + { + self.requeue_transactions(std::mem::take(&mut transactions)); + return None; + } let starfish_speed_excluded_authors = self.starfish_speed_excluded_ack_authors(clock_round); if starfish_speed_excluded_authors & (1u128 << self.authority) != 0 { @@ -671,6 +669,29 @@ impl Core { block_references.retain(|r| r.round == 0 || self.dag_state.has_vertex_certificate(r)); } + // SailfishPlusPlus: verify the filtered parent set still has quorum + // stake at round-1. If certification hasn't caught up, we must wait + // rather than create an invalid block. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && block_round > 1 + { + let prev_round = block_round - 1; + let mut prev_round_stake: u64 = 0; + let mut seen = 0u128; + for r in &block_references { + if r.round == prev_round { + let mask = 1u128 << r.authority; + if seen & mask == 0 { + seen |= mask; + prev_round_stake += self.committee.get_stake(r.authority).unwrap_or(0); + } + } + } + if !self.committee.is_quorum(prev_round_stake) { + return (transactions, vec![]); + } + } + (transactions, block_references) } @@ -1006,6 +1027,20 @@ impl Core { pending_refs: &[BlockReference], block_round: RoundNumber, ) -> Vec { + // SailfishPlusPlus: keep all previous-round references unconditionally + // so that certified-parent filtering doesn't drop below quorum. + // Only older-round references go through normal compression. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + let prev_round = block_round.saturating_sub(1); + let mut seen = AHashSet::new(); + return pending_refs + .iter() + .copied() + .filter(|r| { + r.authority != self.authority && seen.insert(*r) && r.round >= prev_round + }) + .collect(); + } if self.dag_state.consensus_protocol == ConsensusProtocol::StarfishBls { if self.committee.elect_leader(block_round) != self.authority { return Vec::new(); diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index a0fa4602..a6e5c5be 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -1044,6 +1044,9 @@ pub struct NetworkSyncerInner { pub peer_senders: parking_lot::RwLock>>, pub leader_timeout: Duration, pub soft_block_timeout: Duration, + /// Sailfish++ service handle for sending control messages + /// (timeout/no-vote). None for non-SailfishPlusPlus protocols. + pub sailfish_handle: Option, } impl NetworkSyncer { @@ -1079,12 +1082,20 @@ impl NetworkSyncer }; // Create Sailfish service channel for SailfishPlusPlus protocol. let is_sailfish_pp = dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus; + let sailfish_signer = if is_sailfish_pp { + Some(core.get_signer().clone()) + } else { + None + }; let (sf_msg_tx, sf_msg_rx) = if is_sailfish_pp { let (tx, rx) = mpsc::unbounded_channel::(); (Some(tx), Some(rx)) } else { (None, None) }; + let sf_handle_for_inner = sf_msg_tx + .as_ref() + .map(|tx| SailfishServiceHandle::new(tx.clone())); let mut syncer = Syncer::new( core, NetworkSyncSignals { @@ -1147,6 +1158,7 @@ impl NetworkSyncer peer_senders: parking_lot::RwLock::new(AHashMap::new()), leader_timeout: node_parameters.leader_timeout, soft_block_timeout: node_parameters.soft_block_timeout, + sailfish_handle: sf_handle_for_inner, }); // Start bridge task that forwards reconstructed transaction data to core @@ -1268,12 +1280,14 @@ impl NetworkSyncer }; // Start Sailfish++ RBC certification service. - let (sf_service, sf_event_task) = if let (Some(sf_tx), Some(sf_rx)) = (sf_msg_tx, sf_msg_rx) + let (sf_service, sf_event_task) = if let (Some(sf_tx), Some(sf_rx), Some(sf_signer)) = + (sf_msg_tx, sf_msg_rx, sailfish_signer) { let (event_tx, mut event_rx) = mpsc::unbounded_channel::>(); start_sailfish_service( inner.committee.clone(), own_authority, + sf_signer, sf_rx, event_tx, metrics.clone(), @@ -1333,11 +1347,14 @@ impl NetworkSyncer } } SailfishCertEvent::SendNoVote(msg) => { - // No-vote messages are sent only to the - // next-round leader. For now broadcast. - for sender in &senders { + // Route no-vote only to the next-round leader. + let next_leader = + event_inner.committee.elect_leader(msg.round + 1); + let leader_tx = + event_inner.peer_senders.read().get(&next_leader).cloned(); + if let Some(sender) = leader_tx { send_network_message_reliably( - sender, + &sender, NetworkMessage::SailfishNoVote(msg.clone()), ) .await; @@ -1582,6 +1599,15 @@ impl NetworkSyncer select! { _sleep = sleep(leader_timeout) => { tracing::debug!("Timeout in round {round}"); + // SailfishPlusPlus: send LocalTimeout so the service can + // sign and aggregate a timeout certificate for the round + // we're stuck on (the leader at `round` hasn't been + // certified in time). + if let Some(ref sf) = inner.sailfish_handle { + if round > 0 { + sf.send(SailfishServiceMessage::LocalTimeout(round)); + } + } inner.syncer.force_new_block(round).await; } _notified = notified => { diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index f35db1cf..faa1d8bf 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -15,7 +15,7 @@ use tokio::sync::mpsc; use crate::{ cert_aggregator::{CertEvent, CertificationAggregator}, committee::Committee, - crypto::SignatureBytes, + crypto::{self, SignatureBytes, Signer}, metrics::Metrics, types::{ AuthorityIndex, BlockReference, CertMessage, CertMessageKind, RoundNumber, @@ -149,6 +149,7 @@ impl SignedQuorumAggregator { pub fn start_sailfish_service( committee: Arc, own_authority: AuthorityIndex, + signer: Signer, receiver: mpsc::UnboundedReceiver, event_tx: mpsc::UnboundedSender>, _metrics: Arc, @@ -156,6 +157,7 @@ pub fn start_sailfish_service( tokio::spawn(run_sailfish_service( committee, own_authority, + signer, receiver, event_tx, )); @@ -164,10 +166,11 @@ pub fn start_sailfish_service( async fn run_sailfish_service( committee: Arc, own_authority: AuthorityIndex, + signer: Signer, mut receiver: mpsc::UnboundedReceiver, event_tx: mpsc::UnboundedSender>, ) { - let mut state = ServiceState::new(committee, own_authority); + let mut state = ServiceState::new(committee, own_authority, signer); while let Some(msg) = receiver.recv().await { let mut all_events = Vec::new(); @@ -191,17 +194,19 @@ async fn run_sailfish_service( struct ServiceState { committee: Arc, own_authority: AuthorityIndex, + signer: Signer, rbc: CertificationAggregator, timeouts: AHashMap, no_votes: AHashMap<(RoundNumber, AuthorityIndex), SignedQuorumAggregator>, } impl ServiceState { - fn new(committee: Arc, own_authority: AuthorityIndex) -> Self { + fn new(committee: Arc, own_authority: AuthorityIndex, signer: Signer) -> Self { Self { rbc: CertificationAggregator::new(committee.clone()), committee, own_authority, + signer, timeouts: AHashMap::new(), no_votes: AHashMap::new(), } @@ -286,20 +291,44 @@ impl ServiceState { // -- Timeout aggregation ------------------------------------------------- + /// Sign a local timeout, count it, and emit a broadcast event. fn handle_local_timeout(&mut self, round: RoundNumber, events: &mut Vec) { - // The caller (net_sync) is responsible for signing the message before - // sending it here. This method just creates a pre-signed local timeout. - // Actually, we sign here so the service can be the single signing point. - // Net_sync will pass a Signer handle if needed, but for now the - // LocalTimeout variant expects the caller to have already called - // send_sailfish_message with a pre-signed message. - // - // For now, LocalTimeout just tells us we should have sent the timeout - // already — the actual signing happens in net_sync.rs. - let _ = (round, events); + let digest = crypto::sailfish_timeout_digest(round); + let signature = self.signer.sign_digest(&digest); + let msg = SailfishTimeoutMsg { + round, + sender: self.own_authority, + signature, + }; + // Count own message in the aggregator first (may form cert immediately). + self.add_verified_timeout(msg.clone(), events); + events.push(SailfishCertEvent::BroadcastTimeout(msg)); } fn add_timeout_msg(&mut self, msg: SailfishTimeoutMsg, events: &mut Vec) { + // Verify signature before aggregation. + let digest = crypto::sailfish_timeout_digest(msg.round); + let pk = match self.committee.get_public_key(msg.sender) { + Some(pk) => pk, + None => return, + }; + if pk.verify_digest_signature(&digest, &msg.signature).is_err() { + tracing::warn!( + "Rejected invalid timeout sig from {} for round {}", + msg.sender, + msg.round, + ); + return; + } + self.add_verified_timeout(msg, events); + } + + /// Add a pre-verified timeout message to the aggregator. + fn add_verified_timeout( + &mut self, + msg: SailfishTimeoutMsg, + events: &mut Vec, + ) { let agg = self .timeouts .entry(msg.round) @@ -314,16 +343,46 @@ impl ServiceState { // -- No-vote aggregation ------------------------------------------------- + /// Sign a local no-vote, count it, and emit a send event. fn handle_local_novote( &mut self, round: RoundNumber, leader: AuthorityIndex, events: &mut Vec, ) { - let _ = (round, leader, events); + let digest = crypto::sailfish_novote_digest(round, leader); + let signature = self.signer.sign_digest(&digest); + let msg = SailfishNoVoteMsg { + round, + leader, + sender: self.own_authority, + signature, + }; + self.add_verified_novote(msg.clone(), events); + events.push(SailfishCertEvent::SendNoVote(msg)); } fn add_novote_msg(&mut self, msg: SailfishNoVoteMsg, events: &mut Vec) { + // Verify signature before aggregation. + let digest = crypto::sailfish_novote_digest(msg.round, msg.leader); + let pk = match self.committee.get_public_key(msg.sender) { + Some(pk) => pk, + None => return, + }; + if pk.verify_digest_signature(&digest, &msg.signature).is_err() { + tracing::warn!( + "Rejected invalid novote sig from {} for round {}, leader {}", + msg.sender, + msg.round, + msg.leader, + ); + return; + } + self.add_verified_novote(msg, events); + } + + /// Add a pre-verified no-vote message to the aggregator. + fn add_verified_novote(&mut self, msg: SailfishNoVoteMsg, events: &mut Vec) { let agg = self .no_votes .entry((msg.round, msg.leader)) @@ -353,6 +412,13 @@ mod tests { Committee::new_test(vec![1; n]) } + fn test_signer(authority: AuthorityIndex) -> Signer { + Signer::new_for_test(authority as usize + 1) + .into_iter() + .nth(authority as usize) + .unwrap() + } + fn test_metrics() -> Arc { Metrics::new(&Registry::new(), None, None, None).0 } @@ -368,7 +434,14 @@ mod tests { let (msg_tx, msg_rx) = mpsc::unbounded_channel(); let (event_tx, mut event_rx) = mpsc::unbounded_channel(); - start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); // Own echo broadcast. msg_tx @@ -428,7 +501,14 @@ mod tests { let (msg_tx, msg_rx) = mpsc::unbounded_channel(); let (event_tx, mut event_rx) = mpsc::unbounded_channel(); - start_sailfish_service(committee, own_authority, msg_rx, event_tx, test_metrics()); + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); // Own echo. msg_tx @@ -495,6 +575,7 @@ mod tests { start_sailfish_service( committee.clone(), own_authority, + test_signer(own_authority), msg_rx, event_tx, test_metrics(), @@ -539,6 +620,7 @@ mod tests { start_sailfish_service( committee.clone(), own_authority, + test_signer(own_authority), msg_rx, event_tx, test_metrics(), @@ -583,6 +665,7 @@ mod tests { start_sailfish_service( committee.clone(), own_authority, + test_signer(own_authority), msg_rx, event_tx, test_metrics(), diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index 5f7cd05d..cc03ea06 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -14,7 +14,7 @@ use crate::{ bls_service::BlsServiceMessage, consensus::{CommitMetastate, linearizer::CommittedSubDag}, core::Core, - dag_state::{DagState, DataSource}, + dag_state::{ConsensusProtocol, DagState, DataSource}, data::Data, metrics::{Metrics, UtilizationTimerVecExt}, runtime::timestamp_utc, @@ -301,6 +301,25 @@ impl Syncer { self.send_sailfish_message(SailfishServiceMessage::ProcessBlocks(vec![ *block.reference(), ])); + // SailfishPlusPlus: if we created a block without referencing the + // previous-round leader, send a LocalNoVote so the service can + // sign and aggregate a no-vote certificate. + if self.core.dag_state().consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + let block_round = block.round(); + if block_round > 1 { + let prev_leader = self.core.committee().elect_leader(block_round - 1); + let has_prev_leader = block + .block_references() + .iter() + .any(|r| r.round == block_round - 1 && r.authority == prev_leader); + if !has_prev_leader { + self.send_sailfish_message(SailfishServiceMessage::LocalNoVote { + round: block_round - 1, + leader: prev_leader, + }); + } + } + } if let Some((block_ref, auth, sig)) = self.core.generate_own_dac_partial_sig(block) { self.send_bls_message(BlsServiceMessage::PartialSig(PartialSig { kind: PartialSigKind::Dac(block_ref), diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index 82f79e61..c0f73510 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -1323,7 +1323,10 @@ impl VerifiedBlock { self.header.bls().is_none(), "Only StarfishBls blocks may carry BLS fields" ); - // Validate Sailfish++ control fields. + // Validate Sailfish++ control fields if present. + // TC/NVC presence is not yet enforced as mandatory because the + // local timeout/no-vote origination is still being wired. Once + // the full control plane is live, add "must carry" checks here. if let Some(sf) = self.header.sailfish() { self.verify_sailfish_fields(sf, committee)?; } From 40b78f4330d6cc735aa8701828f5cf719aa0c07f Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 12:35:32 +0100 Subject: [PATCH 07/21] fix(core): count own_previous in Sailfish quorum check and requeue includes Two bugs causing the dryrun stall: 1. The post-filter quorum check in collect_transactions_and_references did not account for the creator's own previous block, which build_block always prepends. This caused valid proposals to be rejected even when own_previous + peer parents had quorum. 2. On failed proposals, get_pending_transactions drains Include refs from self.pending, but requeue_transactions only puts back Payload. The Include refs were permanently lost, so subsequent retries saw an empty frontier. Now collect_transactions_and_references returns the raw include refs on failure so the caller can requeue them. --- crates/starfish-core/src/core.rs | 36 +++++++++++++++++++++------- crates/starfish-core/src/net_sync.rs | 24 ++++++++++++++++++- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index afd4dded..cbc31416 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -513,7 +513,7 @@ impl Core { "Core::new_block::get_pending_transactions", self.get_pending_transactions(clock_round) ); - let (mut transactions, block_references) = timed!( + let (mut transactions, block_references, failed_refs) = timed!( self.metrics, "Core::new_block::collect_transactions_and_references", self.collect_transactions_and_references(pending_transactions, clock_round) @@ -521,11 +521,15 @@ impl Core { // SailfishPlusPlus: if the certified-parent filter reduced the parent // set below threshold-clock quorum, we cannot build a valid block yet. - // Requeue transactions and wait for more certifications. + // Requeue both transactions and include refs so the next attempt sees + // them again. if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus && clock_round > 1 && block_references.is_empty() { + for r in failed_refs { + self.pending.push(MetaTransaction::Include(r)); + } self.requeue_transactions(std::mem::take(&mut transactions)); return None; } @@ -650,7 +654,11 @@ impl Core { &self, pending: Vec, block_round: RoundNumber, - ) -> (Vec, Vec) { + ) -> ( + Vec, + Vec, + Vec, + ) { let mut transactions = Vec::new(); let mut pending_refs = Vec::new(); for meta_transaction in pending { @@ -661,6 +669,7 @@ impl Core { MetaTransaction::Include(include) => pending_refs.push(include), } } + let raw_refs = pending_refs.clone(); let mut block_references = self.compress_pending_block_references(&pending_refs, block_round); @@ -669,15 +678,26 @@ impl Core { block_references.retain(|r| r.round == 0 || self.dag_state.has_vertex_certificate(r)); } - // SailfishPlusPlus: verify the filtered parent set still has quorum - // stake at round-1. If certification hasn't caught up, we must wait - // rather than create an invalid block. + // SailfishPlusPlus: verify the filtered parent set, together with the + // creator's own previous block (always included by build_block), still + // has quorum stake at round-1. if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus && block_round > 1 { let prev_round = block_round - 1; let mut prev_round_stake: u64 = 0; let mut seen = 0u128; + // Count own_previous: build_block always prepends the author's + // previous block, which is at prev_round after a successful round. + let own_prev_stake = self.committee.get_stake(self.authority).unwrap_or(0); + if self + .last_own_block + .first() + .is_some_and(|ob| ob.block.round() == prev_round) + { + seen |= 1u128 << self.authority; + prev_round_stake += own_prev_stake; + } for r in &block_references { if r.round == prev_round { let mask = 1u128 << r.authority; @@ -688,11 +708,11 @@ impl Core { } } if !self.committee.is_quorum(prev_round_stake) { - return (transactions, vec![]); + return (transactions, vec![], raw_refs); } } - (transactions, block_references) + (transactions, block_references, vec![]) } fn prepare_encoded_transactions( diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index a6e5c5be..10c01e01 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -380,8 +380,19 @@ impl ConnectionHandler { if message.sender != self.peer_id { - return true; // reject: sender must match peer + tracing::debug!( + "Rejected CertMessage: sender {} != peer {}", + message.sender, + self.peer_id, + ); + return true; } + tracing::debug!( + "Received {:?} from peer {} for {:?}", + message.kind, + message.sender, + message.block_ref, + ); if let Some(ref sf) = self.sailfish_service { sf.send(SailfishServiceMessage::CertMessage(message)); } @@ -1297,6 +1308,7 @@ impl NetworkSyncer let event_inner = inner.clone(); let event_task = handle.spawn(async move { while let Some(events) = event_rx.recv().await { + tracing::debug!("Sailfish event bridge: {} events", events.len(),); let certified_refs: Vec<_> = events .iter() .filter_map(|event| match event { @@ -1305,6 +1317,11 @@ impl NetworkSyncer }) .collect(); if !certified_refs.is_empty() { + tracing::info!( + "Applying {} Sailfish certificates: {:?}", + certified_refs.len(), + certified_refs, + ); event_inner .syncer .apply_sailfish_certificates(certified_refs) @@ -1326,6 +1343,11 @@ impl NetworkSyncer { let senders: Vec<_> = event_inner.peer_senders.read().values().cloned().collect(); + tracing::debug!( + "Sailfish broadcast: {} peers, {} events", + senders.len(), + events.len(), + ); for event in &events { match event { SailfishCertEvent::Broadcast(message) => { From d48633367c2e7ed0cdcb7c0e2ccfd2f8a892e0d0 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 13:16:13 +0100 Subject: [PATCH 08/21] fix(dag_state): mark empty SailfishPlusPlus blocks as data-available The is_empty_full_block check in update_data_availability matched only Mysticeti and CordialMiners. SailfishPlusPlus blocks with empty payloads (transactions: None, empty merkle root) were never marked data-available, blocking drain_available_commits and preventing transaction metrics from being reported. --- crates/starfish-core/src/dag_state.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index c44d121d..8f0782f3 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -2372,7 +2372,9 @@ impl DagStateInner { let auth = r.authority as usize; let is_empty_full_block = matches!( self.consensus_protocol, - ConsensusProtocol::Mysticeti | ConsensusProtocol::CordialMiners + ConsensusProtocol::Mysticeti + | ConsensusProtocol::CordialMiners + | ConsensusProtocol::SailfishPlusPlus ) && block.transactions().is_none() && block.merkle_root() == TransactionsCommitment::new_from_transactions(&Vec::new()); if block.has_empty_payload() || is_empty_full_block { From d26170ba43fe1b2013941545be402eba6883ab93 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 13:47:11 +0100 Subject: [PATCH 09/21] feat(metrics): add Sailfish++ RBC fast/slow path counters Add sailfish_rbc_fast_total and sailfish_rbc_slow_total counters that track how many vertices are certified via the optimistic fast path (echo quorum) vs the slow path (ready quorum). Incremented in the sailfish service dispatch_cert_events on FastDelivery and SlowDelivery events respectively. Add a "Sailfish++ RBC certification path" panel to the Grafana dashboard showing the rate of fast vs slow certifications. --- crates/starfish-core/src/metrics.rs | 14 +++ crates/starfish-core/src/sailfish_service.rs | 22 ++++- monitoring/grafana/grafana-dashboard.json | 94 ++++++++++++++++++++ 3 files changed, 126 insertions(+), 4 deletions(-) diff --git a/crates/starfish-core/src/metrics.rs b/crates/starfish-core/src/metrics.rs index 223a15ab..48defa02 100644 --- a/crates/starfish-core/src/metrics.rs +++ b/crates/starfish-core/src/metrics.rs @@ -41,6 +41,8 @@ pub struct Metrics { pub proposal_wait_time_total_us: IntCounter, pub sequenced_transactions_total: IntCounter, pub sequenced_transactions_bytes: IntCounter, + pub sailfish_rbc_fast_total: IntCounter, + pub sailfish_rbc_slow_total: IntCounter, pub filtered_blocks_total: IntCounter, pub filtered_shards_total: IntCounter, @@ -583,6 +585,18 @@ impl Metrics { registry, ) .unwrap(), + sailfish_rbc_fast_total: register_int_counter_with_registry!( + "sailfish_rbc_fast_total", + "Sailfish++ RBC certifications via fast path (echo quorum)", + registry, + ) + .unwrap(), + sailfish_rbc_slow_total: register_int_counter_with_registry!( + "sailfish_rbc_slow_total", + "Sailfish++ RBC certifications via slow path (ready quorum)", + registry, + ) + .unwrap(), dag_state_loaded_blocks: register_int_counter_with_registry!( "dag_state_loaded_blocks", diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index faa1d8bf..2d4d9141 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -152,7 +152,7 @@ pub fn start_sailfish_service( signer: Signer, receiver: mpsc::UnboundedReceiver, event_tx: mpsc::UnboundedSender>, - _metrics: Arc, + metrics: Arc, ) { tokio::spawn(run_sailfish_service( committee, @@ -160,6 +160,7 @@ pub fn start_sailfish_service( signer, receiver, event_tx, + metrics, )); } @@ -169,8 +170,9 @@ async fn run_sailfish_service( signer: Signer, mut receiver: mpsc::UnboundedReceiver, event_tx: mpsc::UnboundedSender>, + metrics: Arc, ) { - let mut state = ServiceState::new(committee, own_authority, signer); + let mut state = ServiceState::new(committee, own_authority, signer, metrics); while let Some(msg) = receiver.recv().await { let mut all_events = Vec::new(); @@ -195,18 +197,25 @@ struct ServiceState { committee: Arc, own_authority: AuthorityIndex, signer: Signer, + metrics: Arc, rbc: CertificationAggregator, timeouts: AHashMap, no_votes: AHashMap<(RoundNumber, AuthorityIndex), SignedQuorumAggregator>, } impl ServiceState { - fn new(committee: Arc, own_authority: AuthorityIndex, signer: Signer) -> Self { + fn new( + committee: Arc, + own_authority: AuthorityIndex, + signer: Signer, + metrics: Arc, + ) -> Self { Self { rbc: CertificationAggregator::new(committee.clone()), committee, own_authority, signer, + metrics, timeouts: AHashMap::new(), no_votes: AHashMap::new(), } @@ -264,7 +273,12 @@ impl ServiceState { let mut pending = VecDeque::from(cert_events); while let Some(event) = pending.pop_front() { match event { - CertEvent::FastDelivery(block_ref) | CertEvent::SlowDelivery(block_ref) => { + CertEvent::FastDelivery(block_ref) => { + self.metrics.sailfish_rbc_fast_total.inc(); + out.push(SailfishCertEvent::Certified(block_ref)); + } + CertEvent::SlowDelivery(block_ref) => { + self.metrics.sailfish_rbc_slow_total.inc(); out.push(SailfishCertEvent::Certified(block_ref)); } CertEvent::SendVote(block_ref) => { diff --git a/monitoring/grafana/grafana-dashboard.json b/monitoring/grafana/grafana-dashboard.json index ba6dad2c..9f0701ef 100644 --- a/monitoring/grafana/grafana-dashboard.json +++ b/monitoring/grafana/grafana-dashboard.json @@ -1775,6 +1775,100 @@ "title": "Leader commit status rate", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 254, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "editorMode": "code", + "expr": "sum(rate(sailfish_rbc_fast_total{node=~\"$node\"}[1m]))", + "legendFormat": "fast path", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "editorMode": "code", + "expr": "sum(rate(sailfish_rbc_slow_total{node=~\"$node\"}[1m]))", + "legendFormat": "slow path", + "range": true, + "refId": "B" + } + ], + "title": "Sailfish++ RBC certification path", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", From 356ce82477160b7a5d054f8fe5b17b555e77b62f Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 13:57:21 +0100 Subject: [PATCH 10/21] fix(dashboard): move Sailfish RBC panel next to Block sync requests --- monitoring/grafana/grafana-dashboard.json | 188 +++++++++++----------- 1 file changed, 94 insertions(+), 94 deletions(-) diff --git a/monitoring/grafana/grafana-dashboard.json b/monitoring/grafana/grafana-dashboard.json index 9f0701ef..85215c73 100644 --- a/monitoring/grafana/grafana-dashboard.json +++ b/monitoring/grafana/grafana-dashboard.json @@ -1775,100 +1775,6 @@ "title": "Leader commit status rate", "type": "timeseries" }, - { - "datasource": { - "type": "prometheus", - "uid": "Fixed-UID-testbed" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 53 - }, - "id": 254, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.3.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "Fixed-UID-testbed" - }, - "editorMode": "code", - "expr": "sum(rate(sailfish_rbc_fast_total{node=~\"$node\"}[1m]))", - "legendFormat": "fast path", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "Fixed-UID-testbed" - }, - "editorMode": "code", - "expr": "sum(rate(sailfish_rbc_slow_total{node=~\"$node\"}[1m]))", - "legendFormat": "slow path", - "range": true, - "refId": "B" - } - ], - "title": "Sailfish++ RBC certification path", - "type": "timeseries" - }, { "datasource": { "type": "prometheus", @@ -6204,6 +6110,100 @@ "title": "Tx data requests sent", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 201 + }, + "id": 254, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "editorMode": "code", + "expr": "sum(rate(sailfish_rbc_fast_total{node=~\"$node\"}[1m]))", + "legendFormat": "fast path", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Fixed-UID-testbed" + }, + "editorMode": "code", + "expr": "sum(rate(sailfish_rbc_slow_total{node=~\"$node\"}[1m]))", + "legendFormat": "slow path", + "range": true, + "refId": "B" + } + ], + "title": "Sailfish++ RBC certification path", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", From 01327e8cf09e3a4ca1ca003192389ac38c2bf09a Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Tue, 17 Mar 2026 15:41:40 +0100 Subject: [PATCH 11/21] feat(sailfish): buffer early RBC messages for unknown blocks When a peer's Echo/Vote/Ready arrives before we've seen the block, buffer it keyed by (round, authority) slot instead of dropping it. Drain matching messages through the aggregator when the canonical block is registered via ProcessBlocks. Conflicting-digest messages in the buffer are silently discarded. Per-slot buffer is capped at 3 * committee_size to bound memory. --- crates/starfish-core/src/sailfish_service.rs | 416 ++++++++++++++++++- 1 file changed, 414 insertions(+), 2 deletions(-) diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index 2d4d9141..27ca5b01 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -193,12 +193,28 @@ async fn run_sailfish_service( // Service state (all aggregators) // --------------------------------------------------------------------------- +/// Result of checking an inbound RBC message against known canonical blocks. +enum RbcAcceptance { + /// Block is known and the message references the canonical digest. + Canonical, + /// Block is known but the message references a different digest. + Conflicting, + /// No block is registered for this (round, authority) slot yet. + Unknown, +} + struct ServiceState { committee: Arc, own_authority: AuthorityIndex, signer: Signer, metrics: Arc, rbc: CertificationAggregator, + canonical_blocks: AHashMap<(RoundNumber, AuthorityIndex), BlockReference>, + /// RBC messages that arrived before the corresponding block was seen. + /// Keyed by (round, authority) slot. Drained when the block is registered. + pending_rbc: AHashMap<(RoundNumber, AuthorityIndex), Vec>, + /// Maximum number of buffered messages per slot (bound against spam). + pending_rbc_cap: usize, timeouts: AHashMap, no_votes: AHashMap<(RoundNumber, AuthorityIndex), SignedQuorumAggregator>, } @@ -210,12 +226,17 @@ impl ServiceState { signer: Signer, metrics: Arc, ) -> Self { + // At most 3 message types (Echo, Vote, Ready) per authority. + let pending_rbc_cap = 3 * committee.len(); Self { rbc: CertificationAggregator::new(committee.clone()), committee, own_authority, signer, metrics, + canonical_blocks: AHashMap::new(), + pending_rbc: AHashMap::new(), + pending_rbc_cap, timeouts: AHashMap::new(), no_votes: AHashMap::new(), } @@ -229,6 +250,9 @@ impl ServiceState { match msg { SailfishServiceMessage::ProcessBlocks(block_refs) => { for block_ref in block_refs { + if !self.register_canonical_block(block_ref, events) { + continue; + } let echo = CertMessage { block_ref, sender: self.own_authority, @@ -240,8 +264,16 @@ impl ServiceState { } } SailfishServiceMessage::CertMessage(message) => { - let cert_events = self.rbc.add_message(&message); - self.dispatch_cert_events(cert_events, events); + match self.accept_rbc_message(&message) { + RbcAcceptance::Canonical => { + let cert_events = self.rbc.add_message(&message); + self.dispatch_cert_events(cert_events, events); + } + RbcAcceptance::Unknown => { + self.buffer_rbc_message(message); + } + RbcAcceptance::Conflicting => {} + } } SailfishServiceMessage::TimeoutMsg(msg) => { self.add_timeout_msg(msg, events); @@ -257,12 +289,84 @@ impl ServiceState { } SailfishServiceMessage::Cleanup(round) => { self.rbc.cleanup_below_round(round); + self.canonical_blocks.retain(|&(r, _), _| r >= round); + self.pending_rbc.retain(|&(r, _), _| r >= round); self.timeouts.retain(|&r, _| r >= round); self.no_votes.retain(|&(r, _), _| r >= round); } } } + fn register_canonical_block( + &mut self, + block_ref: BlockReference, + events: &mut Vec, + ) -> bool { + let slot = (block_ref.round, block_ref.authority); + match self.canonical_blocks.get(&slot).copied() { + None => { + self.canonical_blocks.insert(slot, block_ref); + // Drain any early RBC messages buffered for this slot. + if let Some(buffered) = self.pending_rbc.remove(&slot) { + for msg in buffered { + if msg.block_ref == block_ref { + let cert_events = self.rbc.add_message(&msg); + self.dispatch_cert_events(cert_events, events); + } + } + } + true + } + Some(canonical) => { + if canonical != block_ref { + tracing::warn!( + "Ignoring conflicting Sailfish block {:?}; \ + canonical for ({}, {}) is {:?}", + block_ref, + block_ref.authority, + block_ref.round, + canonical, + ); + } + false + } + } + } + + fn accept_rbc_message(&self, message: &CertMessage) -> RbcAcceptance { + let slot = (message.block_ref.round, message.block_ref.authority); + match self.canonical_blocks.get(&slot) { + Some(canonical) if *canonical == message.block_ref => RbcAcceptance::Canonical, + Some(canonical) => { + tracing::debug!( + "Ignoring RBC {:?} for conflicting block {:?}; \ + canonical is {:?}", + message.kind, + message.block_ref, + canonical, + ); + RbcAcceptance::Conflicting + } + None => RbcAcceptance::Unknown, + } + } + + /// Buffer an RBC message for a block we haven't seen yet. + fn buffer_rbc_message(&mut self, message: CertMessage) { + let slot = (message.block_ref.round, message.block_ref.authority); + let buf = self.pending_rbc.entry(slot).or_default(); + if buf.len() >= self.pending_rbc_cap { + tracing::warn!( + "Dropping RBC {:?} for slot ({}, {}): buffer full", + message.kind, + slot.0, + slot.1, + ); + return; + } + buf.push(message); + } + // -- RBC event dispatch -------------------------------------------------- fn dispatch_cert_events( @@ -417,7 +521,10 @@ impl ServiceState { #[cfg(test)] mod tests { + use std::time::Duration; + use prometheus::Registry; + use tokio::time::timeout; use super::*; use crate::crypto; @@ -437,6 +544,22 @@ mod tests { Metrics::new(&Registry::new(), None, None, None).0 } + fn conflicting_ref(block_ref: BlockReference) -> BlockReference { + BlockReference { + digest: crate::crypto::BlockDigest::new_without_transactions( + block_ref.authority, + block_ref.round, + &[], + &[], + 1, + &crate::crypto::SignatureBytes::default(), + crate::crypto::TransactionsCommitment::default(), + None, + ), + ..block_ref + } + } + /// N=4, F=1: 2 echoes triggers FastDelivery + SendVote + SendReady. /// Verifies that the local vote and ready are counted (via add_message) /// before being broadcast. @@ -621,6 +744,295 @@ mod tests { })); } + #[tokio::test] + async fn only_first_block_for_author_round_gets_local_echo() { + let committee = make_committee(4); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 7); + let conflicting = conflicting_ref(block_ref); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx.recv().await.expect("expected local echo"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { block_ref: received, kind, .. }) + if *received == block_ref && *kind == CertMessageKind::Echo + ) + })); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![conflicting])) + .unwrap(); + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + "conflicting block for same (author, round) must not trigger another echo" + ); + } + + #[tokio::test] + async fn conflicting_rbc_messages_are_ignored_for_same_author_round() { + let committee = make_committee(4); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 7); + let conflicting = conflicting_ref(block_ref); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let _ = event_rx.recv().await.expect("expected local echo"); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref: conflicting, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + "conflicting RBC message must be ignored" + ); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + let events = event_rx + .recv() + .await + .expect("expected canonical block to keep progressing"); + assert!(events.iter().any(|event| { + matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) + })); + } + + /// Early RBC messages for unknown blocks are buffered and produce no + /// immediate events. Once the block arrives via ProcessBlocks, the + /// buffered echo is drained and counted alongside the local echo. + /// N=4, F=1: 2 echoes (buffered peer + own) reach fast-path quorum. + #[tokio::test] + async fn unknown_rbc_message_is_buffered_until_block_arrives() { + let committee = make_committee(4); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 7); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + // Peer echo arrives before we know the block — must not produce events. + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + "RBC messages for unknown blocks must not produce events immediately" + ); + + // Block arrives: the local echo + drained buffered echo reach quorum. + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx.recv().await.expect("expected events"); + // Local echo is broadcast. + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { + block_ref: received, + kind, + .. + }) if *received == block_ref && *kind == CertMessageKind::Echo + ) + })); + // Buffered peer echo + own echo = 2 echoes → fast-path certification. + assert!( + events + .iter() + .any(|event| matches!(event, SailfishCertEvent::Certified(r) if *r == block_ref)), + "buffered echo should contribute to certification after block arrives" + ); + } + + /// N=7, F=2: Send echo + ready from two peers before the block arrives, + /// then ProcessBlocks. The drained buffer should produce the local echo + /// broadcast, the buffered echoes feeding into the aggregator, and the + /// buffered readys feeding into the aggregator — all in one event batch. + #[tokio::test] + async fn buffered_rbc_messages_drain_on_block_arrival() { + let committee = make_committee(7); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 9); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + // Buffer echoes from 3 peers before the block is known. + for sender in [2, 3, 4] { + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender, + kind: CertMessageKind::Echo, + })) + .unwrap(); + } + // No events yet. + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + ); + + // Block arrives: local echo + 3 buffered echoes = 4 echoes → quorum. + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx.recv().await.expect("expected events"); + + // Local echo is broadcast. + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { + block_ref: received, + kind, + .. + }) if *received == block_ref && *kind == CertMessageKind::Echo + ) + })); + // SendVote triggered (own vote broadcast). + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { + sender, + kind, + .. + }) if *sender == own_authority && *kind == CertMessageKind::Vote + ) + })); + // SendReady triggered (own ready broadcast). + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { + sender, + kind, + .. + }) if *sender == own_authority && *kind == CertMessageKind::Ready + ) + })); + } + + /// Buffered messages for a conflicting digest are discarded when the + /// canonical block arrives. + #[tokio::test] + async fn buffered_conflicting_messages_discarded_on_block_arrival() { + let committee = make_committee(4); + let own_authority = 1; + let block_ref = BlockReference::new_test(0, 7); + let conflicting = conflicting_ref(block_ref); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + // Buffer an echo for the conflicting digest. + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref: conflicting, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + ); + + // Canonical block arrives — conflicting buffered echo is dropped. + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx.recv().await.expect("expected local echo"); + // Only local echo broadcast, no certification (would need 2 echoes). + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { kind, .. }) + if *kind == CertMessageKind::Echo + ) + })); + assert!( + !events + .iter() + .any(|event| matches!(event, SailfishCertEvent::Certified(_))), + "conflicting buffered echo must not count toward certification" + ); + } + /// N=4, F=1: quorum_threshold = 3. Three no-vote messages form a NVC. #[tokio::test] async fn novote_cert_formation() { From f117450c65396a0637e645598392db51e099e184 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 00:52:07 +0100 Subject: [PATCH 12/21] Require Sailfish timeout proofs when leader is absent --- crates/starfish-core/src/core.rs | 25 +++- crates/starfish-core/src/types.rs | 208 +++++++++++++++++++++++++++++- 2 files changed, 223 insertions(+), 10 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index cbc31416..6adc8d9d 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -513,7 +513,7 @@ impl Core { "Core::new_block::get_pending_transactions", self.get_pending_transactions(clock_round) ); - let (mut transactions, block_references, failed_refs) = timed!( + let (mut transactions, block_references, raw_refs) = timed!( self.metrics, "Core::new_block::collect_transactions_and_references", self.collect_transactions_and_references(pending_transactions, clock_round) @@ -527,7 +527,21 @@ impl Core { && clock_round > 1 && block_references.is_empty() { - for r in failed_refs { + for r in raw_refs { + self.pending.push(MetaTransaction::Include(r)); + } + self.requeue_transactions(std::mem::take(&mut transactions)); + return None; + } + + // SailfishPlusPlus: if the previous-round leader is not referenced, + // the timeout-control rule must be satisfied before we construct the + // block. Requeue both transactions and include refs so the next retry + // sees the full frontier again. + if self.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + && !self.sailfish_control_ready(clock_round, &block_references) + { + for r in raw_refs { self.pending.push(MetaTransaction::Include(r)); } self.requeue_transactions(std::mem::take(&mut transactions)); @@ -712,7 +726,7 @@ impl Core { } } - (transactions, block_references, vec![]) + (transactions, block_references, raw_refs) } fn prepare_encoded_transactions( @@ -980,7 +994,6 @@ impl Core { /// Check whether Sailfish++ control-plane prerequisites are met for /// creating a block in `clock_round`. Returns true if block creation can /// proceed. - #[allow(dead_code)] fn sailfish_control_ready( &self, clock_round: RoundNumber, @@ -992,7 +1005,9 @@ impl Core { let prev_round = clock_round - 1; let prev_leader = self.committee.elect_leader(prev_round); - let has_path = block_references + let has_path = self.last_own_block.first().is_some_and(|own_block| { + own_block.block.round() == prev_round && own_block.block.authority() == prev_leader + }) || block_references .iter() .any(|r| r.round == prev_round && r.authority == prev_leader); if has_path { diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index c0f73510..00418858 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -1323,11 +1323,40 @@ impl VerifiedBlock { self.header.bls().is_none(), "Only StarfishBls blocks may carry BLS fields" ); - // Validate Sailfish++ control fields if present. - // TC/NVC presence is not yet enforced as mandatory because the - // local timeout/no-vote origination is still being wired. Once - // the full control plane is live, add "must carry" checks here. - if let Some(sf) = self.header.sailfish() { + if round > 1 { + let prev_round = round - 1; + let prev_leader = committee.elect_leader(prev_round); + let has_prev_leader_parent = self + .header + .block_references + .iter() + .any(|r| r.round == prev_round && r.authority == prev_leader); + + if has_prev_leader_parent { + if let Some(sf) = self.header.sailfish() { + self.verify_sailfish_fields(sf, committee)?; + } + } else { + let sf = self.header.sailfish().ok_or_else(|| { + eyre::eyre!( + "SailfishPlusPlus block missing timeout cert because previous-round leader {prev_leader} is not referenced" + ) + })?; + ensure!( + sf.timeout_cert.is_some(), + "SailfishPlusPlus block missing timeout cert because previous-round leader {} is not referenced", + prev_leader + ); + if self.authority() == committee.elect_leader(round) { + ensure!( + sf.no_vote_cert.is_some(), + "SailfishPlusPlus leader block missing no-vote cert because previous-round leader {} is not referenced", + prev_leader + ); + } + self.verify_sailfish_fields(sf, committee)?; + } + } else if let Some(sf) = self.header.sailfish() { self.verify_sailfish_fields(sf, committee)?; } } @@ -1690,6 +1719,75 @@ mod tests { (block, committee) } + fn make_sailfish_timeout_cert( + round: RoundNumber, + signers: &[Signer], + committee: &Committee, + ) -> SailfishTimeoutCert { + let digest = crypto::sailfish_timeout_digest(round); + let signatures = (0..committee.quorum_threshold() as AuthorityIndex) + .map(|authority| (authority, signers[authority as usize].sign_digest(&digest))) + .collect(); + SailfishTimeoutCert { round, signatures } + } + + fn make_sailfish_no_vote_cert( + round: RoundNumber, + leader: AuthorityIndex, + signers: &[Signer], + committee: &Committee, + ) -> SailfishNoVoteCert { + let digest = crypto::sailfish_novote_digest(round, leader); + let signatures = (0..committee.quorum_threshold() as AuthorityIndex) + .map(|authority| (authority, signers[authority as usize].sign_digest(&digest))) + .collect(); + SailfishNoVoteCert { + round, + leader, + signatures, + } + } + + fn make_sailfish_block( + authority: AuthorityIndex, + round: RoundNumber, + include_prev_leader: bool, + sailfish: Option, + ) -> (VerifiedBlock, std::sync::Arc) { + let committee = Committee::new_for_benchmarks(4); + let signers = Signer::new_for_test(committee.len()); + let prev_round = round - 1; + let prev_leader = committee.elect_leader(prev_round); + let block_references = committee + .authorities() + .filter(|candidate| include_prev_leader || *candidate != prev_leader) + .take(committee.quorum_threshold() as usize) + .map(|candidate| BlockReference::new_test(candidate, prev_round)) + .collect(); + let block = VerifiedBlock::new_with_signer( + authority, + round, + block_references, + None, + vec![], + 0, + &signers[authority as usize], + None, + None, + vec![], + vec![], + None, + ConsensusProtocol::SailfishPlusPlus, + None, + None, + None, + None, + None, + sailfish, + ); + (block, committee) + } + #[test] fn compresses_acknowledgments_against_shared_suffix() { let a = BlockReference::new_test(0, 1); @@ -2016,6 +2114,106 @@ mod tests { .unwrap(); } + #[test] + fn rejects_sailfish_block_without_timeout_cert_when_previous_leader_is_missing() { + let committee = Committee::new_for_benchmarks(4); + let non_leader = committee + .authorities() + .find(|authority| *authority != committee.elect_leader(3)) + .unwrap(); + let (mut block, committee) = make_sailfish_block(non_leader, 3, false, None); + + let mut encoder = Encoder::new(2, 4, 2).unwrap(); + let err = block + .verify( + committee.as_ref(), + 0, + 1, + &mut encoder, + ConsensusProtocol::SailfishPlusPlus, + ) + .unwrap_err(); + + assert!(err.to_string().contains("missing timeout cert")); + } + + #[test] + fn rejects_sailfish_leader_block_without_no_vote_cert_when_previous_leader_is_missing() { + let committee = Committee::new_for_benchmarks(4); + let signers = Signer::new_for_test(committee.len()); + let leader = committee.elect_leader(3); + let timeout_cert = make_sailfish_timeout_cert(2, &signers, committee.as_ref()); + let sailfish = SailfishFields { + timeout_cert: Some(timeout_cert), + no_vote_cert: None, + }; + let (mut block, committee) = make_sailfish_block(leader, 3, false, Some(sailfish)); + + let mut encoder = Encoder::new(2, 4, 2).unwrap(); + let err = block + .verify( + committee.as_ref(), + 0, + 1, + &mut encoder, + ConsensusProtocol::SailfishPlusPlus, + ) + .unwrap_err(); + + assert!(err.to_string().contains("missing no-vote cert")); + } + + #[test] + fn verifies_sailfish_block_with_previous_leader_parent_and_no_control_fields() { + let committee = Committee::new_for_benchmarks(4); + let non_leader = committee + .authorities() + .find(|authority| *authority != committee.elect_leader(3)) + .unwrap(); + let (mut block, committee) = make_sailfish_block(non_leader, 3, true, None); + + let mut encoder = Encoder::new(2, 4, 2).unwrap(); + block + .verify( + committee.as_ref(), + 0, + 1, + &mut encoder, + ConsensusProtocol::SailfishPlusPlus, + ) + .unwrap(); + } + + #[test] + fn verifies_sailfish_leader_block_with_timeout_and_no_vote_certs_when_previous_leader_is_missing() + { + let committee = Committee::new_for_benchmarks(4); + let signers = Signer::new_for_test(committee.len()); + let leader = committee.elect_leader(3); + let prev_leader = committee.elect_leader(2); + let sailfish = SailfishFields { + timeout_cert: Some(make_sailfish_timeout_cert(2, &signers, committee.as_ref())), + no_vote_cert: Some(make_sailfish_no_vote_cert( + 2, + prev_leader, + &signers, + committee.as_ref(), + )), + }; + let (mut block, committee) = make_sailfish_block(leader, 3, false, Some(sailfish)); + + let mut encoder = Encoder::new(2, 4, 2).unwrap(); + block + .verify( + committee.as_ref(), + 0, + 1, + &mut encoder, + ConsensusProtocol::SailfishPlusPlus, + ) + .unwrap(); + } + #[test] fn verifies_starfish_bls_non_leader_may_reference_latest_own_block_from_earlier_round() { let committee = Committee::new_for_benchmarks(4); From a1a404568a8bc71aaf96ef7d667a8123126d5406 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 00:52:37 +0100 Subject: [PATCH 13/21] Adjust Sailfish defaults and timeout proof checks --- Cargo.lock | 4 ++-- crates/starfish-core/src/dag_state.rs | 4 ++-- scripts/dryrun.sh | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba21725b..d7c8f9a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2175,9 +2175,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" dependencies = [ "twox-hash", ] diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 8f0782f3..fbf2e906 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -3284,12 +3284,12 @@ mod tests { ); assert_eq!( ConsensusProtocol::Starfish.default_dissemination_mode(), - DisseminationMode::PushUseful + DisseminationMode::PushCausal ); assert_eq!( ConsensusProtocol::StarfishBls .resolve_dissemination_mode(DisseminationMode::ProtocolDefault), - DisseminationMode::PushUseful + DisseminationMode::PushCausal ); assert_eq!( ConsensusProtocol::Starfish.resolve_dissemination_mode(DisseminationMode::PushUseful), diff --git a/scripts/dryrun.sh b/scripts/dryrun.sh index fa8e0d47..b66cbd5e 100755 --- a/scripts/dryrun.sh +++ b/scripts/dryrun.sh @@ -8,7 +8,7 @@ NUM_NODES=${NUM_NODES:-10} DESIRED_TPS=${DESIRED_TPS:-1000} # Options: starfish, starfish-speed, starfish-bls, # sailfish-pp, cordial-miners, mysticeti -CONSENSUS=${CONSENSUS:-starfish-speed} +CONSENSUS=${CONSENSUS:-sailfish-pp} NUM_BYZANTINE_NODES=${NUM_BYZANTINE_NODES:-0} # Options: timeout-leader, leader-withholding, # equivocating-chains, equivocating-two-chains, @@ -24,7 +24,7 @@ STORAGE_BACKEND=${STORAGE_BACKEND:-rocksdb} TRANSACTION_MODE=${TRANSACTION_MODE:-random} # Dissemination mode: protocol-default (default) | pull | # push-causal | push-useful -DISSEMINATION_MODE=${DISSEMINATION_MODE:-push-causal} +#DISSEMINATION_MODE=${DISSEMINATION_MODE:-} # Enable lz4 network compression. # Auto-enabled for random transaction mode. # Set COMPRESS_NETWORK=1 or =0 to override. From a5950c7c0f2ff713bdf04e3bb28d5fa9a6ff1aa4 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 01:03:28 +0100 Subject: [PATCH 14/21] Use certified rounds for Sailfish proposal timing --- crates/starfish-core/src/core.rs | 6 +- crates/starfish-core/src/dag_state.rs | 119 ++++++++++++++++++++++++++ crates/starfish-core/src/net_sync.rs | 15 ++-- crates/starfish-core/src/syncer.rs | 10 ++- 4 files changed, 138 insertions(+), 12 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 6adc8d9d..1c6d6537 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -142,7 +142,7 @@ impl Core { let pending_start_round = recovered_last_own_round .unwrap_or_default() - .max(dag_state.threshold_clock_round().saturating_sub(1)); + .max(dag_state.proposal_round().saturating_sub(1)); for block in &unprocessed_blocks { if block.round() >= pending_start_round { pending.push(MetaTransaction::Include(*block.reference())); @@ -469,7 +469,7 @@ impl Core { .utilization_timer("Core::new_block::try_new_block"); // Check if we're ready for a new block - let clock_round = self.dag_state.threshold_clock_round(); + let clock_round = self.dag_state.proposal_round(); tracing::debug!( "Attempt to construct block in round {}. Current pending: {:?}", clock_round, @@ -1243,7 +1243,7 @@ impl Core { connected_authorities: &AHashSet, relaxed: bool, ) -> bool { - let quorum_round = self.dag_state.threshold_clock_round(); + let quorum_round = self.dag_state.proposal_round(); tracing::debug!("Attempt ready new block, quorum round {}", quorum_round); if quorum_round < self.last_commit_leader.round().max(1) { diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index fbf2e906..3764c02e 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -573,6 +573,44 @@ impl DagState { self.dag_state_inner.read().threshold_clock.get_round() } + /// Return the protocol round that may be used for proposing a new block. + /// + /// For SailfishPlusPlus, entering round `r` requires both raw threshold + /// clock progress to `r` and quorum stake of RBC-certified vertices in + /// round `r - 1`. Other protocols use the raw threshold clock directly. + pub fn proposal_round(&self) -> RoundNumber { + if self.consensus_protocol != ConsensusProtocol::SailfishPlusPlus { + return self.threshold_clock_round(); + } + + let inner = self.dag_state_inner.read(); + let threshold_round = inner.threshold_clock.get_round(); + if threshold_round <= 1 { + return threshold_round; + } + + for round in (2..=threshold_round).rev() { + let prev_round = round - 1; + let mut stake: Stake = 0; + for auth in 0..inner.committee_size { + if inner.vertex_certificates[auth] + .iter() + .any(|reference| reference.round == prev_round) + { + stake += self + .committee + .get_stake(auth as AuthorityIndex) + .unwrap_or(0); + } + } + if self.committee.is_quorum(stake) { + return round; + } + } + + 1 + } + pub fn get_dag_sorted(&self) -> Vec<(BlockReference, Vec, AuthorityBitmask)> { let inner = self.dag_state_inner.read(); let mut result: Vec<_> = inner @@ -3036,6 +3074,87 @@ mod tests { assert!(!dag_state.certified_parent_quorum(5)); } + #[test] + fn sailfish_proposal_round_waits_for_certified_previous_round() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let round_1 = vec![ + make_full_block( + 0, + 1, + vec![BlockReference::new_test(0, 0)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 2, + 1, + vec![BlockReference::new_test(2, 0)], + ConsensusProtocol::SailfishPlusPlus, + ), + ]; + let round_2 = vec![ + make_full_block( + 0, + 2, + vec![BlockReference::new_test(0, 1)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 1, + 2, + vec![BlockReference::new_test(1, 1)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 2, + 2, + vec![BlockReference::new_test(2, 1)], + ConsensusProtocol::SailfishPlusPlus, + ), + ]; + let round_3 = vec![ + make_full_block( + 0, + 3, + vec![BlockReference::new_test(0, 2)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 1, + 3, + vec![BlockReference::new_test(1, 2)], + ConsensusProtocol::SailfishPlusPlus, + ), + make_full_block( + 2, + 3, + vec![BlockReference::new_test(2, 2)], + ConsensusProtocol::SailfishPlusPlus, + ), + ]; + + let round_1_refs: Vec<_> = round_1.iter().map(|block| *block.reference()).collect(); + let round_2_refs: Vec<_> = round_2.iter().map(|block| *block.reference()).collect(); + + dag_state.insert_general_blocks(round_1, DataSource::BlockBundleStreaming); + dag_state.insert_general_blocks(round_2, DataSource::BlockBundleStreaming); + dag_state.insert_general_blocks(round_3, DataSource::BlockBundleStreaming); + + assert_eq!(dag_state.threshold_clock_round(), 4); + assert_eq!(dag_state.proposal_round(), 1); + + dag_state.mark_vertices_certified(&round_1_refs); + assert_eq!(dag_state.proposal_round(), 2); + + dag_state.mark_vertices_certified(&round_2_refs); + assert_eq!(dag_state.proposal_round(), 3); + } + #[test] fn starfish_speed_adaptive_acknowledgments_only_uses_local_leader_history() { let dag_state = open_test_dag_state_for_with_feature("starfish-speed", 0, true); diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index 10c01e01..ed3d6615 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -1613,11 +1613,16 @@ impl NetworkSyncer let leader_timeout = inner.leader_timeout; loop { let notified = inner.threshold_clock_notify.notified(); - let round = inner - .dag_state - .last_own_block_ref() - .map(|b| b.round()) - .unwrap_or_default(); + let round = if inner.dag_state.consensus_protocol == ConsensusProtocol::SailfishPlusPlus + { + inner.dag_state.proposal_round().saturating_sub(1) + } else { + inner + .dag_state + .last_own_block_ref() + .map(|b| b.round()) + .unwrap_or_default() + }; select! { _sleep = sleep(leader_timeout) => { tracing::debug!("Timeout in round {round}"); diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index cc03ea06..7930c1c7 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -126,7 +126,7 @@ impl Syncer { AHashSet, Vec, ) { - let previous_threshold_round = self.core.dag_state().threshold_clock_round(); + let previous_threshold_round = self.core.dag_state().proposal_round(); // todo: when block is updated we might return false here and it can make // committing longer let ( @@ -155,7 +155,7 @@ impl Syncer { headers: Vec>, source: DataSource, ) -> (AHashSet, Vec) { - let previous_threshold_round = self.core.dag_state().threshold_clock_round(); + let previous_threshold_round = self.core.dag_state().proposal_round(); let (success, missing_parents, processed_refs, _processed_blocks) = self.core.add_headers(headers, source); self.maybe_update_proposal_wait(); @@ -183,12 +183,14 @@ impl Syncer { /// DagState on the core thread. Retries block creation and sequencing /// when any certificate is new. pub fn apply_sailfish_certificates(&mut self, certified_refs: Vec) { + let previous_threshold_round = self.core.dag_state().proposal_round(); if self .core .dag_state() .mark_vertices_certified(&certified_refs) { self.maybe_update_proposal_wait(); + self.maybe_signal_threshold_round_advance(previous_threshold_round); self.try_new_block(BlockCreationReason::CertificateEvent); self.try_new_commit(); } @@ -347,7 +349,7 @@ impl Syncer { } fn maybe_update_proposal_wait(&mut self) { - let threshold_round = self.core.dag_state().threshold_clock_round(); + let threshold_round = self.core.dag_state().proposal_round(); if threshold_round <= self.core.last_proposed() { return; } @@ -370,7 +372,7 @@ impl Syncer { } fn maybe_signal_threshold_round_advance(&mut self, previous_threshold_round: RoundNumber) { - let current_threshold_round = self.core.dag_state().threshold_clock_round(); + let current_threshold_round = self.core.dag_state().proposal_round(); if current_threshold_round > previous_threshold_round { self.signals .threshold_clock_round_advanced(current_threshold_round); From 6b6511fa68d96662e2dea588d79a8e8a14dc4542 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 01:58:54 +0100 Subject: [PATCH 15/21] Make Sailfish certification ancestor-closed --- crates/starfish-core/src/dag_state.rs | 195 +++++++++++++++++++++++--- crates/starfish-core/src/net_sync.rs | 13 -- crates/starfish-core/src/syncer.rs | 12 +- 3 files changed, 185 insertions(+), 35 deletions(-) diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 3764c02e..bb99303a 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -308,9 +308,19 @@ struct DagStateInner { consensus_protocol: ConsensusProtocol, precomputed_round_sigs: BTreeMap, precomputed_leader_sigs: BTreeMap, - /// Per-authority RBC vertex certificates (SailfishPlusPlus). - /// Stored as BTreeSet per authority for split_off cleanup. + /// Per-authority active RBC vertex certificates (SailfishPlusPlus). + /// A vertex is active only once its direct parents are also active-certified + /// (or genesis), making the usable certified DAG ancestor-closed. vertex_certificates: Vec>, + /// Preliminary SailfishPlusPlus local certifications that are waiting for + /// one or more direct parents to become active-certified. + pending_vertex_certificates: Vec>, + /// For each waiting vertex, the number of direct non-genesis parents that + /// are still missing active certification. + pending_vertex_certificate_counts: BTreeMap, + /// Reverse dependency index: parent -> children that are waiting for that + /// parent to become active-certified. + pending_vertex_certificate_children: BTreeMap>, /// Sailfish++ timeout certificates, indexed by round. sailfish_timeout_certs: BTreeMap, /// Sailfish++ no-vote certificates, indexed by (round, leader). @@ -382,6 +392,9 @@ impl DagState { precomputed_round_sigs: BTreeMap::new(), precomputed_leader_sigs: BTreeMap::new(), vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), + pending_vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), + pending_vertex_certificate_counts: BTreeMap::new(), + pending_vertex_certificate_children: BTreeMap::new(), sailfish_timeout_certs: BTreeMap::new(), sailfish_novote_certs: BTreeMap::new(), }; @@ -989,18 +1002,18 @@ impl DagState { } /// Mark a batch of vertices as RBC-certified (SailfishPlusPlus). - /// Only the explicitly provided references are certified; parent - /// certificates must come from their own RBC evidence. + /// A newly RBC-delivered vertex becomes usable certified state only after + /// all of its direct parents are also usable certified (or genesis). pub fn mark_vertices_certified(&self, block_refs: &[BlockReference]) -> bool { if block_refs.is_empty() { return false; } let mut inner = self.dag_state_inner.write(); - let mut changed = false; + let mut activated = Vec::new(); for &block_ref in block_refs { - changed |= inner.vertex_certificates[block_ref.authority as usize].insert(block_ref); + inner.note_vertex_certified(block_ref, &mut activated); } - changed + !activated.is_empty() } /// Mark a vertex as RBC-certified (SailfishPlusPlus). @@ -1008,7 +1021,8 @@ impl DagState { self.mark_vertices_certified(&[block_ref]) } - /// Check whether a vertex has been RBC-certified (SailfishPlusPlus). + /// Check whether a vertex is active-certified and therefore usable as part + /// of the certified SailfishPlusPlus DAG. pub fn has_vertex_certificate(&self, block_ref: &BlockReference) -> bool { self.dag_state_inner.read().vertex_certificates[block_ref.authority as usize] .contains(block_ref) @@ -2255,12 +2269,121 @@ impl DagStateInner { digest: BlockDigest::default(), }; self.vertex_certificates[auth] = self.vertex_certificates[auth].split_off(&split_ref); + self.pending_vertex_certificates[auth] = + self.pending_vertex_certificates[auth].split_off(&split_ref); } + self.pending_vertex_certificate_counts + .retain(|block_ref, _| { + block_ref.round >= self.evicted_rounds[block_ref.authority as usize] + }); + self.pending_vertex_certificate_children + .retain(|parent, children| { + if parent.round < self.evicted_rounds[parent.authority as usize] { + return false; + } + children + .retain(|child| child.round >= self.evicted_rounds[child.authority as usize]); + !children.is_empty() + }); // Prune Sailfish++ timeout and no-vote certificates. self.sailfish_timeout_certs = self.sailfish_timeout_certs.split_off(&min_evicted); self.sailfish_novote_certs = self.sailfish_novote_certs.split_off(&(min_evicted, 0)); } + fn note_vertex_certified( + &mut self, + block_ref: BlockReference, + activated: &mut Vec, + ) { + let auth = block_ref.authority as usize; + if self.vertex_certificates[auth].contains(&block_ref) + || self.pending_vertex_certificates[auth].contains(&block_ref) + { + return; + } + + let missing_parents = self.missing_active_certificate_parents(block_ref); + if missing_parents.is_empty() { + self.activate_vertex_certificate(block_ref, activated); + return; + } + + self.pending_vertex_certificates[auth].insert(block_ref); + self.pending_vertex_certificate_counts + .insert(block_ref, missing_parents.len()); + for parent in missing_parents { + self.pending_vertex_certificate_children + .entry(parent) + .or_default() + .insert(block_ref); + } + } + + fn missing_active_certificate_parents(&self, block_ref: BlockReference) -> Vec { + let block = self + .get_storage_block(block_ref) + .unwrap_or_else(|| panic!("Certified block {block_ref} should exist in DagState")); + let mut missing = Vec::new(); + for parent in block.block_references() { + if parent.round == 0 { + continue; + } + if !self.vertex_certificates[parent.authority as usize].contains(parent) { + missing.push(*parent); + } + } + missing + } + + fn activate_vertex_certificate( + &mut self, + block_ref: BlockReference, + activated: &mut Vec, + ) { + let auth = block_ref.authority as usize; + if !self.vertex_certificates[auth].insert(block_ref) { + return; + } + + self.pending_vertex_certificates[auth].remove(&block_ref); + self.pending_vertex_certificate_counts.remove(&block_ref); + + let block = self + .get_storage_block(block_ref) + .unwrap_or_else(|| panic!("Certified block {block_ref} should exist in DagState")); + for parent in block.block_references() { + if let Some(children) = self.pending_vertex_certificate_children.get_mut(parent) { + children.remove(&block_ref); + if children.is_empty() { + self.pending_vertex_certificate_children.remove(parent); + } + } + } + + activated.push(block_ref); + + let waiting_children = self + .pending_vertex_certificate_children + .remove(&block_ref) + .unwrap_or_default(); + for child in waiting_children { + let should_activate = match self.pending_vertex_certificate_counts.get_mut(&child) { + Some(remaining) if *remaining > 1 => { + *remaining -= 1; + false + } + Some(_) => { + self.pending_vertex_certificate_counts.remove(&child); + true + } + None => false, + }; + if should_activate { + self.activate_vertex_certificate(child, activated); + } + } + } + pub fn add_block( &mut self, block: Data, @@ -3046,17 +3169,29 @@ mod tests { } #[test] - fn batch_vertex_certification_marks_only_explicit_vertices() { + fn batch_vertex_certification_waits_for_parent_closure() { let dag_state = open_test_dag_state_for("sailfish-pp", 0); - let parent_ref = BlockReference::new_test(1, 1); + let parent = make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ); + let parent_ref = *parent.reference(); let child = make_full_block(2, 2, vec![parent_ref], ConsensusProtocol::SailfishPlusPlus); let child_ref = *child.reference(); + dag_state.insert_general_block(parent, DataSource::BlockBundleStreaming); dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); dag_state.mark_vertices_certified(&[child_ref]); - assert!(dag_state.has_vertex_certificate(&child_ref)); + assert!(!dag_state.has_vertex_certificate(&child_ref)); assert!(!dag_state.has_vertex_certificate(&parent_ref)); + + dag_state.mark_vertices_certified(&[parent_ref]); + + assert!(dag_state.has_vertex_certificate(&parent_ref)); + assert!(dag_state.has_vertex_certificate(&child_ref)); } #[test] @@ -3074,6 +3209,27 @@ mod tests { assert!(!dag_state.certified_parent_quorum(5)); } + #[test] + fn batch_vertex_certification_activates_parent_and_child_from_same_batch() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let parent = make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ); + let parent_ref = *parent.reference(); + let child = make_full_block(2, 2, vec![parent_ref], ConsensusProtocol::SailfishPlusPlus); + let child_ref = *child.reference(); + + dag_state.insert_general_block(parent, DataSource::BlockBundleStreaming); + dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); + dag_state.mark_vertices_certified(&[child_ref, parent_ref]); + + assert!(dag_state.has_vertex_certificate(&parent_ref)); + assert!(dag_state.has_vertex_certificate(&child_ref)); + } + #[test] fn sailfish_proposal_round_waits_for_certified_previous_round() { let dag_state = open_test_dag_state_for("sailfish-pp", 0); @@ -3097,50 +3253,49 @@ mod tests { ConsensusProtocol::SailfishPlusPlus, ), ]; + let round_1_refs: Vec<_> = round_1.iter().map(|block| *block.reference()).collect(); let round_2 = vec![ make_full_block( 0, 2, - vec![BlockReference::new_test(0, 1)], + vec![round_1_refs[0]], ConsensusProtocol::SailfishPlusPlus, ), make_full_block( 1, 2, - vec![BlockReference::new_test(1, 1)], + vec![round_1_refs[1]], ConsensusProtocol::SailfishPlusPlus, ), make_full_block( 2, 2, - vec![BlockReference::new_test(2, 1)], + vec![round_1_refs[2]], ConsensusProtocol::SailfishPlusPlus, ), ]; + let round_2_refs: Vec<_> = round_2.iter().map(|block| *block.reference()).collect(); let round_3 = vec![ make_full_block( 0, 3, - vec![BlockReference::new_test(0, 2)], + vec![round_2_refs[0]], ConsensusProtocol::SailfishPlusPlus, ), make_full_block( 1, 3, - vec![BlockReference::new_test(1, 2)], + vec![round_2_refs[1]], ConsensusProtocol::SailfishPlusPlus, ), make_full_block( 2, 3, - vec![BlockReference::new_test(2, 2)], + vec![round_2_refs[2]], ConsensusProtocol::SailfishPlusPlus, ), ]; - let round_1_refs: Vec<_> = round_1.iter().map(|block| *block.reference()).collect(); - let round_2_refs: Vec<_> = round_2.iter().map(|block| *block.reference()).collect(); - dag_state.insert_general_blocks(round_1, DataSource::BlockBundleStreaming); dag_state.insert_general_blocks(round_2, DataSource::BlockBundleStreaming); dag_state.insert_general_blocks(round_3, DataSource::BlockBundleStreaming); diff --git a/crates/starfish-core/src/net_sync.rs b/crates/starfish-core/src/net_sync.rs index ed3d6615..2ee3676f 100644 --- a/crates/starfish-core/src/net_sync.rs +++ b/crates/starfish-core/src/net_sync.rs @@ -707,11 +707,6 @@ impl ConnectionHandler = new_data_blocks.iter().map(|b| *b.reference()).collect(); - sf.send(SailfishServiceMessage::ProcessBlocks(block_refs)); - } // Notify CordialKnowledge about all new headers in one batch. let header_refs = new_data_blocks .iter() @@ -862,14 +857,6 @@ impl ConnectionHandler = verified_data_blocks - .iter() - .map(|b| *b.reference()) - .collect(); - sf.send(SailfishServiceMessage::ProcessBlocks(block_refs)); - } // Notify CordialKnowledge about all new headers and shards in one batch. let header_refs: Vec<_> = verified_data_blocks .iter() diff --git a/crates/starfish-core/src/syncer.rs b/crates/starfish-core/src/syncer.rs index 7930c1c7..578cb67a 100644 --- a/crates/starfish-core/src/syncer.rs +++ b/crates/starfish-core/src/syncer.rs @@ -134,8 +134,12 @@ impl Syncer { pending_blocks_with_transactions, missing_parents, used_additional_blocks, - _processed_blocks, + processed_blocks, ) = self.core.add_blocks(blocks, source); + if !processed_blocks.is_empty() { + let block_refs: Vec<_> = processed_blocks.iter().map(|b| *b.reference()).collect(); + self.send_sailfish_message(SailfishServiceMessage::ProcessBlocks(block_refs)); + } self.maybe_update_proposal_wait(); self.maybe_signal_threshold_round_advance(previous_threshold_round); if success { @@ -156,8 +160,12 @@ impl Syncer { source: DataSource, ) -> (AHashSet, Vec) { let previous_threshold_round = self.core.dag_state().proposal_round(); - let (success, missing_parents, processed_refs, _processed_blocks) = + let (success, missing_parents, processed_refs, processed_blocks) = self.core.add_headers(headers, source); + if !processed_blocks.is_empty() { + let block_refs: Vec<_> = processed_blocks.iter().map(|b| *b.reference()).collect(); + self.send_sailfish_message(SailfishServiceMessage::ProcessBlocks(block_refs)); + } self.maybe_update_proposal_wait(); self.maybe_signal_threshold_round_advance(previous_threshold_round); if success { From ad97906139ba147f87c571d0943f5f943b9e7b21 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 02:08:14 +0100 Subject: [PATCH 16/21] Exclude Sailfish broadcaster from RBC thresholds --- crates/starfish-core/src/cert_aggregator.rs | 62 ++++++++++++++-- crates/starfish-core/src/sailfish_service.rs | 75 ++++++++++++++++++++ 2 files changed, 130 insertions(+), 7 deletions(-) diff --git a/crates/starfish-core/src/cert_aggregator.rs b/crates/starfish-core/src/cert_aggregator.rs index a7d2071b..fca13757 100644 --- a/crates/starfish-core/src/cert_aggregator.rs +++ b/crates/starfish-core/src/cert_aggregator.rs @@ -81,6 +81,11 @@ impl CertificationAggregator { } fn add_echo(&mut self, message: &CertMessage) -> Vec { + // The broadcaster/author can equivocate, so the optimistic Sailfish++ + // Echo thresholds count only non-broadcaster senders. + if message.sender == message.block_ref.authority { + return Vec::new(); + } let state = self .rounds .entry(message.block_ref.round) @@ -121,6 +126,10 @@ impl CertificationAggregator { } fn add_vote(&mut self, message: &CertMessage) -> Vec { + // Votes inherit the same non-broadcaster counting rule as echoes. + if message.sender == message.block_ref.authority { + return Vec::new(); + } let state = self .rounds .entry(message.block_ref.round) @@ -235,15 +244,16 @@ mod tests { let events = agg.add_message(&echo(block, 0)); assert!(!agg.is_certified(&block)); - // With N=4, F=1: fast = ceil((4+2-2)/2) = 2, vote = ceil(4/2) = 2 - // So 1 echo is not enough + assert!(events.is_empty()); + + let events = agg.add_message(&echo(block, 1)); assert!( events .iter() .all(|e| !matches!(e, CertEvent::FastDelivery(_))) ); - let events = agg.add_message(&echo(block, 1)); + let events = agg.add_message(&echo(block, 2)); assert!( events .iter() @@ -261,6 +271,9 @@ mod tests { agg.add_message(&echo(block, 0)); let events = agg.add_message(&echo(block, 1)); + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendVote(_)))); + + let events = agg.add_message(&echo(block, 2)); assert!(events.iter().any(|e| matches!(e, CertEvent::SendVote(_)))); } @@ -273,6 +286,9 @@ mod tests { agg.add_message(&vote(block, 0)); let events = agg.add_message(&vote(block, 1)); + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + + let events = agg.add_message(&vote(block, 2)); assert!(events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); } @@ -287,6 +303,9 @@ mod tests { assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); let events = agg.add_message(&echo(block, 1)); + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + + let events = agg.add_message(&echo(block, 2)); assert!(events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); } @@ -318,11 +337,40 @@ mod tests { let mut agg = CertificationAggregator::new(committee); let block = BlockReference::new_test(0, 1); - agg.add_message(&echo(block, 0)); - let events = agg.add_message(&echo(block, 0)); + agg.add_message(&echo(block, 1)); + let events = agg.add_message(&echo(block, 1)); assert!(events.is_empty()); } + #[test] + fn broadcaster_echo_does_not_count_toward_fast_delivery() { + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&echo(block, 0)); + let events = agg.add_message(&echo(block, 1)); + + assert!( + !events + .iter() + .any(|e| matches!(e, CertEvent::FastDelivery(_))) + ); + assert!(!agg.is_certified(&block)); + } + + #[test] + fn broadcaster_vote_does_not_count_toward_ready_trigger() { + let committee = make_committee(4); + let mut agg = CertificationAggregator::new(committee); + let block = BlockReference::new_test(0, 1); + + agg.add_message(&vote(block, 0)); + let events = agg.add_message(&vote(block, 1)); + + assert!(!events.iter().any(|e| matches!(e, CertEvent::SendReady(_)))); + } + #[test] fn cleanup_via_split_off() { let committee = make_committee(4); @@ -330,8 +378,8 @@ mod tests { let block_r1 = BlockReference::new_test(0, 1); let block_r5 = BlockReference::new_test(0, 5); - agg.add_message(&echo(block_r1, 0)); - agg.add_message(&echo(block_r5, 0)); + agg.add_message(&echo(block_r1, 1)); + agg.add_message(&echo(block_r5, 1)); agg.cleanup_below_round(3); assert!(!agg.rounds.contains_key(&1)); diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index 27ca5b01..f7650195 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -627,6 +627,81 @@ mod tests { })); } + /// For a locally-authored block, the service still broadcasts the local + /// echo, but the broadcaster's echo must not count toward optimistic Echo + /// or Vote thresholds. + #[tokio::test] + async fn local_author_echo_is_broadcast_but_not_counted() { + let committee = make_committee(4); + let own_authority = 0; + let block_ref = BlockReference::new_test(0, 7); + let (msg_tx, msg_rx) = mpsc::unbounded_channel(); + let (event_tx, mut event_rx) = mpsc::unbounded_channel(); + + start_sailfish_service( + committee, + own_authority, + test_signer(own_authority), + msg_rx, + event_tx, + test_metrics(), + ); + + msg_tx + .send(SailfishServiceMessage::ProcessBlocks(vec![block_ref])) + .unwrap(); + let events = event_rx + .recv() + .await + .expect("expected local echo broadcast"); + assert!(events.iter().any(|event| { + matches!( + event, + SailfishCertEvent::Broadcast(CertMessage { + sender, + block_ref: received, + kind, + }) if *sender == own_authority + && *received == block_ref + && *kind == CertMessageKind::Echo + ) + })); + assert!( + !events + .iter() + .any(|event| matches!(event, SailfishCertEvent::Certified(_))) + ); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 1, + kind: CertMessageKind::Echo, + })) + .unwrap(); + assert!( + timeout(Duration::from_millis(50), event_rx.recv()) + .await + .is_err(), + "author echo + one peer echo must not reach optimistic thresholds" + ); + + msg_tx + .send(SailfishServiceMessage::CertMessage(CertMessage { + block_ref, + sender: 2, + kind: CertMessageKind::Echo, + })) + .unwrap(); + let events = event_rx + .recv() + .await + .expect("expected certification after two non-author echoes"); + assert!(events.iter().any(|event| { + matches!(event, SailfishCertEvent::Certified(received) if *received == block_ref) + })); + } + /// N=7, F=2: 4 echoes triggers SendVote + SendReady (both at threshold 4). /// Then 4 peer readys (+ own ready already counted) reaches quorum for /// SlowDelivery. From b1129e2b1c3ce4595896548270da9ba678e107a5 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 02:15:31 +0100 Subject: [PATCH 17/21] Add Sailfish delivered-support commit rule --- .../src/consensus/universal_committer.rs | 129 +++++++++++++++++- 1 file changed, 127 insertions(+), 2 deletions(-) diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index 46be348f..4c1465d7 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -296,8 +296,13 @@ impl UniversalCommitter { .has_vertex_certificate(leader_block.reference()) }) .filter(|leader_block| { - self.supporting_stake_for_sailfish(leader_block.reference(), support_round) - >= self.committee.quorum_threshold() + let support = self.supporting_stake_for_sailfish(leader_block.reference(), support_round); + let delivered_support = self.delivered_supporting_stake_for_sailfish( + leader_block.reference(), + support_round, + ); + support >= self.committee.quorum_threshold() + || delivered_support >= self.committee.validity_threshold() }) .collect(); @@ -330,6 +335,28 @@ impl UniversalCommitter { aggregator.get_stake() } + fn delivered_supporting_stake_for_sailfish( + &self, + leader_ref: &BlockReference, + support_round: RoundNumber, + ) -> Stake { + let supporting_blocks = self.dag_state.get_blocks_by_round_cached(support_round); + let mut aggregator = StakeAggregator::::new(); + for block in supporting_blocks.iter() { + if !self.dag_state.has_vertex_certificate(block.reference()) { + continue; + } + if block + .block_references() + .iter() + .any(|reference| reference == leader_ref) + { + aggregator.add(block.authority(), &self.committee); + } + } + aggregator.get_stake() + } + /// Return list of leaders for the round. Syncer may give those leaders some /// extra time. To preserve (theoretical) liveness, we should wait /// `Delta` time for at least the first leader. @@ -438,3 +465,101 @@ impl UniversalCommitterBuilder { } } } + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + use crate::{ + config::StorageBackend, + crypto::{SignatureBytes, TransactionsCommitment}, + dag_state::DataSource, + data::Data, + }; + use prometheus::Registry; + + fn open_test_dag_state_for(consensus: &str, authority: AuthorityIndex) -> DagState { + let committee = Committee::new_for_benchmarks(4); + let registry = Registry::new(); + let (metrics, _reporter) = + Metrics::new(®istry, Some(committee.as_ref()), Some(consensus), None); + let dir = TempDir::new().unwrap(); + let path = dir.path().to_path_buf(); + std::mem::forget(dir); + DagState::open( + authority, + path, + metrics, + committee, + "honest".to_string(), + consensus.to_string(), + &StorageBackend::Rocksdb, + false, + ) + .dag_state + } + + fn make_full_block( + authority: AuthorityIndex, + round: RoundNumber, + parents: Vec, + ) -> Data { + let empty_transactions = Vec::new(); + let merkle_root = TransactionsCommitment::new_from_transactions(&empty_transactions); + let mut block = crate::types::VerifiedBlock::new( + authority, + round, + parents, + Vec::new(), + 0, + SignatureBytes::default(), + empty_transactions, + merkle_root, + None, + None, + None, + ); + block.preserialize(); + Data::new(block) + } + + #[test] + fn sailfish_direct_commit_accepts_f_plus_1_delivered_supporters() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let committee = Committee::new_for_benchmarks(4); + let registry = Registry::new(); + let (metrics, _reporter) = + Metrics::new(®istry, Some(committee.as_ref()), Some("sailfish-pp"), None); + + let leader = make_full_block(1, 1, vec![BlockReference::new_test(1, 0)]); + let leader_ref = *leader.reference(); + let supporter_a = make_full_block(0, 2, vec![leader_ref]); + let supporter_b = make_full_block(2, 2, vec![leader_ref]); + + dag_state.insert_general_block(leader, DataSource::BlockBundleStreaming); + dag_state.insert_general_block(supporter_a.clone(), DataSource::BlockBundleStreaming); + dag_state.insert_general_block(supporter_b.clone(), DataSource::BlockBundleStreaming); + + dag_state.mark_vertices_certified(&[ + leader_ref, + *supporter_a.reference(), + *supporter_b.reference(), + ]); + + let mut committer = + UniversalCommitterBuilder::new(committee, dag_state, metrics).build(); + + let decided = committer.try_commit(BlockReference::new_test(0, 0)); + assert!( + decided.iter().any(|status| { + matches!( + status, + LeaderStatus::Commit(block, None) + if block.authority() == 1 && block.round() == 1 + ) + }), + "expected round-1 leader to commit with f+1 certified supporters in round 2" + ); + } +} From 48c0ce9cc86c0c61de7ad24f52318237eda7ec40 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 02:22:30 +0100 Subject: [PATCH 18/21] Persist Sailfish certified refs across recovery --- crates/starfish-core/src/crypto.rs | 6 + crates/starfish-core/src/dag_state.rs | 167 ++++++++++++++----- crates/starfish-core/src/rocks_store.rs | 43 +++++ crates/starfish-core/src/store.rs | 10 ++ crates/starfish-core/src/tidehunter_store.rs | 46 +++++ 5 files changed, 228 insertions(+), 44 deletions(-) diff --git a/crates/starfish-core/src/crypto.rs b/crates/starfish-core/src/crypto.rs index 0b507bd0..74ad135a 100644 --- a/crates/starfish-core/src/crypto.rs +++ b/crates/starfish-core/src/crypto.rs @@ -252,6 +252,12 @@ impl BlockDigest { } } +impl From<[u8; BLOCK_DIGEST_SIZE]> for BlockDigest { + fn from(value: [u8; BLOCK_DIGEST_SIZE]) -> Self { + Self(value) + } +} + pub trait AsBytes { // This is pretty much same as AsRef<[u8]> // diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index bb99303a..fe7685cd 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -469,54 +469,73 @@ impl DagState { block_count, ); } else { - // Fallback: full replay from round 0 (pre-migration data or fresh start). + // Fallback: full replay from storage scan (pre-migration data or + // fresh start without persisted commits). let mut recovered_commit_leaders = AHashSet::new(); - let mut current_round = 0; - loop { - let blocks = store - .get_blocks_by_round(current_round) - .expect("Failed to read blocks from storage"); + let blocks = store + .scan_blocks_from_round(0) + .expect("Failed to scan blocks from storage"); - if blocks.is_empty() { - break; - } + for block in blocks { + let block_ref = *block.reference(); - for block in blocks { - let block_ref = *block.reference(); + // Recover shard sidecars into the shard index. + if let Some(shard) = store + .get_shard_data(&block_ref) + .expect("Failed to read shard data from storage") + { + let auth = block_ref.authority as usize; + inner.shard_index[auth] + .entry(block_ref.round) + .or_default() + .insert(block_ref.digest, shard); + } - // Recover shard sidecars into the shard index. - if let Some(shard) = store - .get_shard_data(&block_ref) - .expect("Failed to read shard data from storage") + inner.threshold_clock.add_block(block_ref, &committee); + builder.block(block_ref.round, block.clone()); + block_count += 1; + inner.add_block(block, 0, committee.len() as AuthorityIndex, &mut bfs_buf); + if recovered_commit_leaders.insert(block_ref) { + if let Some(commit_data) = store + .get_commit(&block_ref) + .expect("Failed to read commit data from storage") { - let auth = block_ref.authority as usize; - inner.shard_index[auth] - .entry(block_ref.round) - .or_default() - .insert(block_ref.digest, shard); - } - - inner.threshold_clock.add_block(block_ref, &committee); - builder.block(current_round, block.clone()); - block_count += 1; - inner.add_block(block, 0, committee.len() as AuthorityIndex, &mut bfs_buf); - if recovered_commit_leaders.insert(block_ref) { - if let Some(commit_data) = store - .get_commit(&block_ref) - .expect("Failed to read commit data from storage") - { - // Rebuild last_committed_rounds from committed blocks. - for r in &commit_data.sub_dag { - let auth = r.authority as usize; - inner.last_committed_rounds[auth] = - inner.last_committed_rounds[auth].max(r.round); - } - builder.commit(commit_data); + // Rebuild last_committed_rounds from committed blocks. + for r in &commit_data.sub_dag { + let auth = r.authority as usize; + inner.last_committed_rounds[auth] = + inner.last_committed_rounds[auth].max(r.round); } + builder.commit(commit_data); } } + } + } - current_round += 1; + // Recover persisted active Sailfish++ certifications. Only the active, + // ancestor-closed frontier is persisted; preliminary waiting state is + // intentionally not recovered. + if consensus_protocol == ConsensusProtocol::SailfishPlusPlus { + let certified_from_round = if use_windowed { + inner.evicted_rounds.iter().copied().min().unwrap_or(0) + } else { + 0 + }; + let certified_refs = store + .scan_sailfish_certified_refs_from_round(certified_from_round) + .expect("Failed to read Sailfish certified refs from storage"); + for block_ref in certified_refs { + let auth = block_ref.authority as usize; + if block_ref.round <= inner.evicted_rounds[auth] { + continue; + } + if inner.index[auth] + .get(&block_ref.round) + .and_then(|blocks| blocks.get(&block_ref.digest)) + .is_some() + { + inner.vertex_certificates[auth].insert(block_ref); + } } } @@ -1008,12 +1027,21 @@ impl DagState { if block_refs.is_empty() { return false; } - let mut inner = self.dag_state_inner.write(); - let mut activated = Vec::new(); - for &block_ref in block_refs { - inner.note_vertex_certified(block_ref, &mut activated); + let activated = { + let mut inner = self.dag_state_inner.write(); + let mut activated = Vec::new(); + for &block_ref in block_refs { + inner.note_vertex_certified(block_ref, &mut activated); + } + activated + }; + if activated.is_empty() { + return false; } - !activated.is_empty() + self.store + .store_sailfish_certified_refs(&activated) + .expect("Failed to persist Sailfish certified refs"); + true } /// Mark a vertex as RBC-certified (SailfishPlusPlus). @@ -3016,6 +3044,28 @@ mod tests { open_test_dag_state_for_with_feature(consensus, authority, false) } + fn open_test_dag_state_at_path( + consensus: &str, + authority: AuthorityIndex, + path: &std::path::Path, + ) -> DagState { + let committee = Committee::new_for_benchmarks(4); + let registry = Registry::new(); + let (metrics, _reporter) = + Metrics::new(®istry, Some(committee.as_ref()), Some(consensus), None); + DagState::open( + authority, + path, + metrics, + committee, + "honest".to_string(), + consensus.to_string(), + &StorageBackend::Rocksdb, + false, + ) + .dag_state + } + fn open_test_dag_state_for_with_feature( consensus: &str, authority: AuthorityIndex, @@ -3230,6 +3280,35 @@ mod tests { assert!(dag_state.has_vertex_certificate(&child_ref)); } + #[test] + fn sailfish_active_certifications_recover_after_reopen() { + let dir = TempDir::new().unwrap(); + let path = dir.path().to_path_buf(); + + let dag_state = open_test_dag_state_at_path("sailfish-pp", 0, &path); + let parent = make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ); + let parent_ref = *parent.reference(); + let child = make_full_block(2, 2, vec![parent_ref], ConsensusProtocol::SailfishPlusPlus); + let child_ref = *child.reference(); + + dag_state.insert_general_block(parent, DataSource::BlockBundleStreaming); + dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); + dag_state.mark_vertices_certified(&[parent_ref, child_ref]); + + assert!(dag_state.has_vertex_certificate(&parent_ref)); + assert!(dag_state.has_vertex_certificate(&child_ref)); + drop(dag_state); + + let reopened = open_test_dag_state_at_path("sailfish-pp", 0, &path); + assert!(reopened.has_vertex_certificate(&parent_ref)); + assert!(reopened.has_vertex_certificate(&child_ref)); + } + #[test] fn sailfish_proposal_round_waits_for_certified_previous_round() { let dag_state = open_test_dag_state_for("sailfish-pp", 0); diff --git a/crates/starfish-core/src/rocks_store.rs b/crates/starfish-core/src/rocks_store.rs index bb8dd99c..0e249604 100644 --- a/crates/starfish-core/src/rocks_store.rs +++ b/crates/starfish-core/src/rocks_store.rs @@ -27,6 +27,7 @@ const CF_HEADERS: &str = "headers"; const CF_TX_DATA: &str = "tx_data"; const CF_SHARD_DATA: &str = "shard_data"; const CF_COMMITS: &str = "commits"; +const CF_SAILFISH_CERTIFIED: &str = "sailfish_certified"; pub struct RocksStore { db: Arc, @@ -142,6 +143,7 @@ impl RocksStore { ColumnFamilyDescriptor::new(CF_TX_DATA, Self::data_cf_options()), ColumnFamilyDescriptor::new(CF_SHARD_DATA, Self::data_cf_options()), ColumnFamilyDescriptor::new(CF_COMMITS, Self::metadata_cf_options()), + ColumnFamilyDescriptor::new(CF_SAILFISH_CERTIFIED, Self::metadata_cf_options()), ]; let db = DB::open_cf_descriptors(&opts, path, cf_descriptors).map_err(io::Error::other)?; @@ -553,4 +555,45 @@ impl Store for RocksStore { }) .collect() } + + fn store_sailfish_certified_refs(&self, refs: &[BlockReference]) -> io::Result<()> { + if refs.is_empty() { + return Ok(()); + } + let cf = self.cf(CF_SAILFISH_CERTIFIED)?; + let mut wb = rocksdb::WriteBatch::default(); + for reference in refs { + let key = serialize(reference).map_err(io::Error::other)?; + wb.put_cf(&cf, key, []); + } + self.db + .write_opt(wb, &self.write_opts) + .map_err(io::Error::other) + } + + fn scan_sailfish_certified_refs_from_round( + &self, + from_round: RoundNumber, + ) -> io::Result> { + let mut refs = Vec::new(); + let seek_key = serialize(&BlockReference { + round: from_round, + authority: 0, + digest: BlockDigest::default(), + }) + .map_err(io::Error::other)?; + + let cf = self.cf(CF_SAILFISH_CERTIFIED)?; + let mut iter = self.db.raw_iterator_cf_opt(&cf, Self::get_read_opts()); + iter.seek(&seek_key); + + while iter.valid() { + let key_bytes = iter.key().ok_or_else(|| io::Error::other("Invalid key"))?; + let reference: BlockReference = deserialize(key_bytes).map_err(io::Error::other)?; + refs.push(reference); + iter.next(); + } + + Ok(refs) + } } diff --git a/crates/starfish-core/src/store.rs b/crates/starfish-core/src/store.rs index 5a01f60f..b5c814c5 100644 --- a/crates/starfish-core/src/store.rs +++ b/crates/starfish-core/src/store.rs @@ -67,4 +67,14 @@ pub trait Store: Send + Sync + 'static { &self, from_round: RoundNumber, ) -> io::Result>>; + + /// Persist a batch of active Sailfish++ certified block references. + fn store_sailfish_certified_refs(&self, refs: &[BlockReference]) -> io::Result<()>; + + /// Return all persisted active Sailfish++ certified block references from + /// `from_round` onward (inclusive). + fn scan_sailfish_certified_refs_from_round( + &self, + from_round: RoundNumber, + ) -> io::Result>; } diff --git a/crates/starfish-core/src/tidehunter_store.rs b/crates/starfish-core/src/tidehunter_store.rs index 40dad5f7..6d0e6c83 100644 --- a/crates/starfish-core/src/tidehunter_store.rs +++ b/crates/starfish-core/src/tidehunter_store.rs @@ -39,6 +39,7 @@ pub struct TideHunterStore { ks_tx_data: KeySpace, ks_shard_data: KeySpace, ks_commits: KeySpace, + ks_sailfish_certified: KeySpace, } impl TideHunterStore { @@ -81,6 +82,7 @@ impl TideHunterStore { let ks_tx_data = Self::add_ks(&mut builder, "tx_data"); let ks_shard_data = Self::add_ks(&mut builder, "shard_data"); let ks_commits = Self::add_ks(&mut builder, "commits"); + let ks_sailfish_certified = Self::add_ks(&mut builder, "sailfish_certified"); let key_shape = builder.build(); let config = Arc::new(Config { @@ -102,6 +104,7 @@ impl TideHunterStore { ks_tx_data, ks_shard_data, ks_commits, + ks_sailfish_certified, }) } @@ -354,4 +357,47 @@ impl Store for TideHunterStore { let key = Self::encode_key(reference); self.point_read(self.ks_shard_data, &key) } + + fn store_sailfish_certified_refs(&self, refs: &[BlockReference]) -> io::Result<()> { + if refs.is_empty() { + return Ok(()); + } + let mut batch = self.db.write_batch(); + for reference in refs { + let key = Self::encode_key(reference); + batch.write(self.ks_sailfish_certified, key.to_vec(), Vec::new()); + } + batch + .commit() + .map_err(|e| io::Error::other(format!("TideHunter store certified refs: {e:?}"))) + } + + fn scan_sailfish_certified_refs_from_round( + &self, + from_round: RoundNumber, + ) -> io::Result> { + let mut refs = Vec::new(); + let lower = Self::round_lower_bound(from_round); + + let mut iter = self.db.iterator(self.ks_sailfish_certified); + iter.set_lower_bound(lower.to_vec()); + + for result in iter { + let (key_bytes, _value) = + result.map_err(|e| io::Error::other(format!("TideHunter iter: {e:?}")))?; + let key: [u8; KEY_SIZE] = key_bytes[..KEY_SIZE] + .try_into() + .map_err(|_| io::Error::other("invalid key length"))?; + + let mut digest = [0u8; 32]; + digest.copy_from_slice(&key[5..37]); + refs.push(BlockReference { + round: u32::from_be_bytes(key[0..4].try_into().expect("slice length checked")), + authority: key[4], + digest: digest.into(), + }); + } + + Ok(refs) + } } From 42a618a392327d4b282558651b63e08f5aeacb43 Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 02:33:43 +0100 Subject: [PATCH 19/21] Infer Sailfish certifications from f+1 support --- crates/starfish-core/src/dag_state.rs | 168 ++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 8 deletions(-) diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index fe7685cd..01d87408 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize}; use crate::tidehunter_store::TideHunterStore; use crate::{ bls_certificate_aggregator::CertificateEvent, - committee::{Committee, QuorumThreshold, StakeAggregator}, + committee::{Committee, QuorumThreshold, StakeAggregator, ValidityThreshold}, config::{DisseminationMode, StorageBackend}, consensus::linearizer::{CommittedSubDag, MAX_TRAVERSAL_DEPTH}, crypto::{BlsSignatureBytes, TransactionsCommitment}, @@ -321,6 +321,10 @@ struct DagStateInner { /// Reverse dependency index: parent -> children that are waiting for that /// parent to become active-certified. pending_vertex_certificate_children: BTreeMap>, + /// Supporters from the next round that directly reference a given block. + /// Once this reaches f+1 distinct authorities, we can infer that some + /// honest node had the referenced block in its active-certified view. + inferred_vertex_support: Vec>>, /// Sailfish++ timeout certificates, indexed by round. sailfish_timeout_certs: BTreeMap, /// Sailfish++ no-vote certificates, indexed by (round, leader). @@ -395,6 +399,7 @@ impl DagState { pending_vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), pending_vertex_certificate_counts: BTreeMap::new(), pending_vertex_certificate_children: BTreeMap::new(), + inferred_vertex_support: (0..n).map(|_| BTreeMap::new()).collect(), sailfish_timeout_certs: BTreeMap::new(), sailfish_novote_certs: BTreeMap::new(), }; @@ -452,7 +457,15 @@ impl DagState { inner.threshold_clock.add_block(block_ref, &committee); builder.block(block_ref.round, block.clone()); block_count += 1; - inner.add_block(block, 0, committee.len() as AuthorityIndex, &mut bfs_buf); + let mut activated = Vec::new(); + inner.add_block( + block, + 0, + committee.len() as AuthorityIndex, + &mut bfs_buf, + &committee, + &mut activated, + ); if let Some(commit_data) = store .get_commit(&block_ref) @@ -494,7 +507,15 @@ impl DagState { inner.threshold_clock.add_block(block_ref, &committee); builder.block(block_ref.round, block.clone()); block_count += 1; - inner.add_block(block, 0, committee.len() as AuthorityIndex, &mut bfs_buf); + let mut activated = Vec::new(); + inner.add_block( + block, + 0, + committee.len() as AuthorityIndex, + &mut bfs_buf, + &committee, + &mut activated, + ); if recovered_commit_leaders.insert(block_ref) { if let Some(commit_data) = store .get_commit(&block_ref) @@ -565,7 +586,15 @@ impl DagState { inner .threshold_clock .add_block(*block.reference(), &committee); - inner.add_block(block.clone(), 0, committee_len, &mut bfs_buf); + let mut activated = Vec::new(); + inner.add_block( + block.clone(), + 0, + committee_len, + &mut bfs_buf, + &committee, + &mut activated, + ); } } @@ -722,7 +751,7 @@ impl DagState { .inc_by(store_start.elapsed().as_micros() as u64); self.metrics.store_block_count.inc(); - let (highest_round, lowest_round) = { + let (highest_round, lowest_round, activated) = { let mut inner = self.dag_state_inner.write(); // Keep threshold clock mutation co-located with DAG insertion so // runtime block acceptance and recovery use the same path. @@ -730,14 +759,22 @@ impl DagState { .threshold_clock .add_block(*block.reference(), &self.committee); let mut bfs_buf = Vec::new(); + let mut activated = Vec::new(); inner.add_block( block, authority_index_start, authority_index_end, &mut bfs_buf, + &self.committee, + &mut activated, ); - (inner.highest_round, inner.global_lowest_round()) + (inner.highest_round, inner.global_lowest_round(), activated) }; + if !activated.is_empty() { + self.store + .store_sailfish_certified_refs(&activated) + .expect("Failed to store inferred Sailfish certifications"); + } self.metrics.dag_highest_round.set(highest_round as i64); self.metrics.dag_lowest_round.set(lowest_round as i64); } @@ -816,9 +853,10 @@ impl DagState { } // Phase 2: single write lock for all DAG mutations. - let (highest_round, lowest_round) = { + let (highest_round, lowest_round, activated) = { let mut inner = self.dag_state_inner.write(); let mut bfs_buf = Vec::new(); + let mut activated = Vec::new(); for block in blocks { inner .threshold_clock @@ -828,10 +866,17 @@ impl DagState { authority_index_start, authority_index_end, &mut bfs_buf, + &self.committee, + &mut activated, ); } - (inner.highest_round, inner.global_lowest_round()) + (inner.highest_round, inner.global_lowest_round(), activated) }; + if !activated.is_empty() { + self.store + .store_sailfish_certified_refs(&activated) + .expect("Failed to store inferred Sailfish certifications"); + } self.metrics.dag_highest_round.set(highest_round as i64); self.metrics.dag_lowest_round.set(lowest_round as i64); } @@ -2299,6 +2344,8 @@ impl DagStateInner { self.vertex_certificates[auth] = self.vertex_certificates[auth].split_off(&split_ref); self.pending_vertex_certificates[auth] = self.pending_vertex_certificates[auth].split_off(&split_ref); + self.inferred_vertex_support[auth] = + self.inferred_vertex_support[auth].split_off(&split_ref); } self.pending_vertex_certificate_counts .retain(|block_ref, _| { @@ -2375,6 +2422,7 @@ impl DagStateInner { self.pending_vertex_certificates[auth].remove(&block_ref); self.pending_vertex_certificate_counts.remove(&block_ref); + self.inferred_vertex_support[auth].remove(&block_ref); let block = self .get_storage_block(block_ref) @@ -2418,6 +2466,8 @@ impl DagStateInner { authority_index_start: AuthorityIndex, authority_index_end: AuthorityIndex, bfs_buffer: &mut Vec, + committee: &Committee, + activated: &mut Vec, ) { let reference = block.reference(); let auth = reference.authority as usize; @@ -2433,6 +2483,78 @@ impl DagStateInner { self.update_dag(*reference, block.block_references().clone(), bfs_buffer); self.update_data_availability(&block); self.update_starfish_speed_leader_hints(&block); + self.update_inferred_sailfish_support(&block, committee, activated); + } + + fn update_inferred_sailfish_support( + &mut self, + block: &VerifiedBlock, + committee: &Committee, + activated: &mut Vec, + ) { + if self.consensus_protocol != ConsensusProtocol::SailfishPlusPlus || block.round() <= 1 { + return; + } + + let supporter = block.authority(); + let mut infer_roots = Vec::new(); + for parent in block.block_references() { + if parent.round == 0 || parent.round + 1 != block.round() { + continue; + } + let reached = self + .inferred_vertex_support[parent.authority as usize] + .entry(*parent) + .or_default() + .add(supporter, committee); + if reached { + infer_roots.push(*parent); + } + } + + for root in infer_roots { + self.infer_vertex_certificate_closure(root, activated); + } + } + + fn infer_vertex_certificate_closure( + &mut self, + root: BlockReference, + activated: &mut Vec, + ) { + if root.round == 0 || self.vertex_certificates[root.authority as usize].contains(&root) { + return; + } + + let mut to_activate = BTreeSet::new(); + let mut stack = vec![root]; + + while let Some(block_ref) = stack.pop() { + if block_ref.round == 0 + || self.vertex_certificates[block_ref.authority as usize].contains(&block_ref) + { + continue; + } + if !to_activate.insert(block_ref) { + continue; + } + + let Some(block) = self.get_storage_block(block_ref) else { + return; + }; + + for parent in block.block_references() { + if parent.round > 0 + && !self.vertex_certificates[parent.authority as usize].contains(parent) + { + stack.push(*parent); + } + } + } + + for block_ref in to_activate { + self.activate_vertex_certificate(block_ref, activated); + } } fn update_starfish_speed_leader_hints(&mut self, block: &VerifiedBlock) { @@ -3280,6 +3402,36 @@ mod tests { assert!(dag_state.has_vertex_certificate(&child_ref)); } + #[test] + fn sailfish_f_plus_1_support_infers_certified_closure() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let ancestor = make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ); + let ancestor_ref = *ancestor.reference(); + let target = make_full_block(1, 2, vec![ancestor_ref], ConsensusProtocol::SailfishPlusPlus); + let target_ref = *target.reference(); + let supporter_a = + make_full_block(0, 3, vec![target_ref], ConsensusProtocol::SailfishPlusPlus); + let supporter_b = + make_full_block(2, 3, vec![target_ref], ConsensusProtocol::SailfishPlusPlus); + + dag_state.insert_general_block(ancestor, DataSource::BlockBundleStreaming); + dag_state.insert_general_block(target, DataSource::BlockBundleStreaming); + assert!(!dag_state.has_vertex_certificate(&ancestor_ref)); + assert!(!dag_state.has_vertex_certificate(&target_ref)); + + dag_state.insert_general_block(supporter_a, DataSource::BlockBundleStreaming); + assert!(!dag_state.has_vertex_certificate(&target_ref)); + + dag_state.insert_general_block(supporter_b, DataSource::BlockBundleStreaming); + assert!(dag_state.has_vertex_certificate(&target_ref)); + assert!(dag_state.has_vertex_certificate(&ancestor_ref)); + } + #[test] fn sailfish_active_certifications_recover_after_reopen() { let dir = TempDir::new().unwrap(); From 1c8d095c0425b496fedb9b2ccd344d38647d2cdf Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Wed, 18 Mar 2026 03:07:17 +0100 Subject: [PATCH 20/21] Buffer Sailfish certified refs until flush --- crates/starfish-core/src/core.rs | 10 ++++ crates/starfish-core/src/dag_state.rs | 76 ++++++++++++++++++++------- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/crates/starfish-core/src/core.rs b/crates/starfish-core/src/core.rs index 1c6d6537..e82ec2c8 100644 --- a/crates/starfish-core/src/core.rs +++ b/crates/starfish-core/src/core.rs @@ -1147,6 +1147,7 @@ impl Core { }; self.last_own_block[block_id] = own_block.clone(); self.dag_state.insert_own_block(own_block.clone()); + self.flush_pending_sailfish_certified_refs(); } /// Generate an own DAC partial signature for a block we just created. @@ -1280,6 +1281,7 @@ impl Core { .store_commits_latency_us .inc_by(store_start.elapsed().as_micros() as u64); self.metrics.store_commits_count.inc(); + self.flush_pending_sailfish_certified_refs(); } pub fn write_commits(&mut self, _commits: &[CommitData]) {} @@ -1299,6 +1301,14 @@ impl Core { pub fn dag_state(&self) -> &DagState { &self.dag_state } + + fn flush_pending_sailfish_certified_refs(&self) { + if self.dag_state.consensus_protocol != ConsensusProtocol::SailfishPlusPlus { + return; + } + self.dag_state.flush_pending_sailfish_certified_refs(); + } + pub fn store(&self) -> Arc { self.store.clone() } diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 01d87408..30adf910 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -321,6 +321,9 @@ struct DagStateInner { /// Reverse dependency index: parent -> children that are waiting for that /// parent to become active-certified. pending_vertex_certificate_children: BTreeMap>, + /// Active Sailfish certifications waiting to be persisted on the next + /// storage flush boundary. + pending_persisted_vertex_certificates: Vec>, /// Supporters from the next round that directly reference a given block. /// Once this reaches f+1 distinct authorities, we can infer that some /// honest node had the referenced block in its active-certified view. @@ -399,6 +402,7 @@ impl DagState { pending_vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), pending_vertex_certificate_counts: BTreeMap::new(), pending_vertex_certificate_children: BTreeMap::new(), + pending_persisted_vertex_certificates: (0..n).map(|_| BTreeSet::new()).collect(), inferred_vertex_support: (0..n).map(|_| BTreeMap::new()).collect(), sailfish_timeout_certs: BTreeMap::new(), sailfish_novote_certs: BTreeMap::new(), @@ -751,7 +755,7 @@ impl DagState { .inc_by(store_start.elapsed().as_micros() as u64); self.metrics.store_block_count.inc(); - let (highest_round, lowest_round, activated) = { + let (highest_round, lowest_round, _activated) = { let mut inner = self.dag_state_inner.write(); // Keep threshold clock mutation co-located with DAG insertion so // runtime block acceptance and recovery use the same path. @@ -770,11 +774,6 @@ impl DagState { ); (inner.highest_round, inner.global_lowest_round(), activated) }; - if !activated.is_empty() { - self.store - .store_sailfish_certified_refs(&activated) - .expect("Failed to store inferred Sailfish certifications"); - } self.metrics.dag_highest_round.set(highest_round as i64); self.metrics.dag_lowest_round.set(lowest_round as i64); } @@ -853,7 +852,7 @@ impl DagState { } // Phase 2: single write lock for all DAG mutations. - let (highest_round, lowest_round, activated) = { + let (highest_round, lowest_round, _activated) = { let mut inner = self.dag_state_inner.write(); let mut bfs_buf = Vec::new(); let mut activated = Vec::new(); @@ -872,11 +871,6 @@ impl DagState { } (inner.highest_round, inner.global_lowest_round(), activated) }; - if !activated.is_empty() { - self.store - .store_sailfish_certified_refs(&activated) - .expect("Failed to store inferred Sailfish certifications"); - } self.metrics.dag_highest_round.set(highest_round as i64); self.metrics.dag_lowest_round.set(lowest_round as i64); } @@ -1080,13 +1074,7 @@ impl DagState { } activated }; - if activated.is_empty() { - return false; - } - self.store - .store_sailfish_certified_refs(&activated) - .expect("Failed to persist Sailfish certified refs"); - true + !activated.is_empty() } /// Mark a vertex as RBC-certified (SailfishPlusPlus). @@ -1094,6 +1082,30 @@ impl DagState { self.mark_vertices_certified(&[block_ref]) } + /// Drain active Sailfish certifications that still need to be persisted at + /// the next storage flush boundary. + pub fn take_pending_sailfish_certified_refs(&self) -> Vec { + let mut inner = self.dag_state_inner.write(); + let mut refs = Vec::new(); + for pending in &mut inner.pending_persisted_vertex_certificates { + refs.extend(std::mem::take(pending)); + } + refs + } + + /// Persist active Sailfish certifications accumulated since the last flush + /// boundary. + pub fn flush_pending_sailfish_certified_refs(&self) -> bool { + let refs = self.take_pending_sailfish_certified_refs(); + if refs.is_empty() { + return false; + } + self.store + .store_sailfish_certified_refs(&refs) + .expect("Failed to persist Sailfish certified refs"); + true + } + /// Check whether a vertex is active-certified and therefore usable as part /// of the certified SailfishPlusPlus DAG. pub fn has_vertex_certificate(&self, block_ref: &BlockReference) -> bool { @@ -2344,6 +2356,8 @@ impl DagStateInner { self.vertex_certificates[auth] = self.vertex_certificates[auth].split_off(&split_ref); self.pending_vertex_certificates[auth] = self.pending_vertex_certificates[auth].split_off(&split_ref); + self.pending_persisted_vertex_certificates[auth] = + self.pending_persisted_vertex_certificates[auth].split_off(&split_ref); self.inferred_vertex_support[auth] = self.inferred_vertex_support[auth].split_off(&split_ref); } @@ -2437,6 +2451,7 @@ impl DagStateInner { } activated.push(block_ref); + self.pending_persisted_vertex_certificates[auth].insert(block_ref); let waiting_children = self .pending_vertex_certificate_children @@ -3402,6 +3417,28 @@ mod tests { assert!(dag_state.has_vertex_certificate(&child_ref)); } + #[test] + fn sailfish_pending_certified_refs_are_buffered_until_flushed() { + let dag_state = open_test_dag_state_for("sailfish-pp", 0); + let parent = make_full_block( + 1, + 1, + vec![BlockReference::new_test(1, 0)], + ConsensusProtocol::SailfishPlusPlus, + ); + let parent_ref = *parent.reference(); + let child = make_full_block(2, 2, vec![parent_ref], ConsensusProtocol::SailfishPlusPlus); + let child_ref = *child.reference(); + + dag_state.insert_general_block(parent, DataSource::BlockBundleStreaming); + dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); + dag_state.mark_vertices_certified(&[child_ref, parent_ref]); + + let pending = dag_state.take_pending_sailfish_certified_refs(); + assert_eq!(pending, vec![parent_ref, child_ref]); + assert!(dag_state.take_pending_sailfish_certified_refs().is_empty()); + } + #[test] fn sailfish_f_plus_1_support_infers_certified_closure() { let dag_state = open_test_dag_state_for("sailfish-pp", 0); @@ -3451,6 +3488,7 @@ mod tests { dag_state.insert_general_block(parent, DataSource::BlockBundleStreaming); dag_state.insert_general_block(child, DataSource::BlockBundleStreaming); dag_state.mark_vertices_certified(&[parent_ref, child_ref]); + dag_state.flush_pending_sailfish_certified_refs(); assert!(dag_state.has_vertex_certificate(&parent_ref)); assert!(dag_state.has_vertex_certificate(&child_ref)); From e4c18d7437dee7299d4549292506233459cf526b Mon Sep 17 00:00:00 2001 From: Nikita Polianskii Date: Thu, 19 Mar 2026 15:08:37 +0100 Subject: [PATCH 21/21] fix(core): CI lint and test fixes for SailfishPlusPlus - Run cargo fmt to fix formatting violations - Wrap long error strings in types.rs for editorconfig compliance - Shorten test fn name exceeding 100-char line limit - Fix batch_vertex_certification_updates_quorum_view: insert actual blocks before certifying (parent-closure check requires them) - Fix sailfish_service timeout/novote tests: use new_for_benchmarks committee so public keys match Signer::new_for_test signatures - Revert dryrun.sh to main (local testing defaults) --- .../src/consensus/universal_committer.rs | 14 ++++--- crates/starfish-core/src/dag_state.rs | 38 +++++++++++++------ crates/starfish-core/src/sailfish_service.rs | 2 +- crates/starfish-core/src/types.rs | 14 ++++--- scripts/dryrun.sh | 6 +-- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/crates/starfish-core/src/consensus/universal_committer.rs b/crates/starfish-core/src/consensus/universal_committer.rs index 4c1465d7..bf4568f4 100644 --- a/crates/starfish-core/src/consensus/universal_committer.rs +++ b/crates/starfish-core/src/consensus/universal_committer.rs @@ -296,7 +296,8 @@ impl UniversalCommitter { .has_vertex_certificate(leader_block.reference()) }) .filter(|leader_block| { - let support = self.supporting_stake_for_sailfish(leader_block.reference(), support_round); + let support = + self.supporting_stake_for_sailfish(leader_block.reference(), support_round); let delivered_support = self.delivered_supporting_stake_for_sailfish( leader_block.reference(), support_round, @@ -529,8 +530,12 @@ mod tests { let dag_state = open_test_dag_state_for("sailfish-pp", 0); let committee = Committee::new_for_benchmarks(4); let registry = Registry::new(); - let (metrics, _reporter) = - Metrics::new(®istry, Some(committee.as_ref()), Some("sailfish-pp"), None); + let (metrics, _reporter) = Metrics::new( + ®istry, + Some(committee.as_ref()), + Some("sailfish-pp"), + None, + ); let leader = make_full_block(1, 1, vec![BlockReference::new_test(1, 0)]); let leader_ref = *leader.reference(); @@ -547,8 +552,7 @@ mod tests { *supporter_b.reference(), ]); - let mut committer = - UniversalCommitterBuilder::new(committee, dag_state, metrics).build(); + let mut committer = UniversalCommitterBuilder::new(committee, dag_state, metrics).build(); let decided = committer.try_commit(BlockReference::new_test(0, 0)); assert!( diff --git a/crates/starfish-core/src/dag_state.rs b/crates/starfish-core/src/dag_state.rs index 30adf910..c5794bb6 100644 --- a/crates/starfish-core/src/dag_state.rs +++ b/crates/starfish-core/src/dag_state.rs @@ -309,8 +309,9 @@ struct DagStateInner { precomputed_round_sigs: BTreeMap, precomputed_leader_sigs: BTreeMap, /// Per-authority active RBC vertex certificates (SailfishPlusPlus). - /// A vertex is active only once its direct parents are also active-certified - /// (or genesis), making the usable certified DAG ancestor-closed. + /// A vertex is active only once its direct parents are also + /// active-certified (or genesis), making the usable certified DAG + /// ancestor-closed. vertex_certificates: Vec>, /// Preliminary SailfishPlusPlus local certifications that are waiting for /// one or more direct parents to become active-certified. @@ -2517,8 +2518,7 @@ impl DagStateInner { if parent.round == 0 || parent.round + 1 != block.round() { continue; } - let reached = self - .inferred_vertex_support[parent.authority as usize] + let reached = self.inferred_vertex_support[parent.authority as usize] .entry(*parent) .or_default() .add(supporter, committee); @@ -3384,16 +3384,25 @@ mod tests { #[test] fn batch_vertex_certification_updates_quorum_view() { let dag_state = open_test_dag_state_for("sailfish-pp", 0); - let certified = vec![ - BlockReference::new_test(0, 4), - BlockReference::new_test(1, 4), - BlockReference::new_test(2, 4), - ]; + let blocks: Vec<_> = (0..3) + .map(|auth| { + make_full_block( + auth, + 1, + vec![BlockReference::new_test(auth, 0)], + ConsensusProtocol::SailfishPlusPlus, + ) + }) + .collect(); + let certified: Vec<_> = blocks.iter().map(|b| *b.reference()).collect(); + for block in blocks { + dag_state.insert_general_block(block, DataSource::BlockBundleStreaming); + } dag_state.mark_vertices_certified(&certified); - assert!(dag_state.certified_parent_quorum(4)); - assert!(!dag_state.certified_parent_quorum(5)); + assert!(dag_state.certified_parent_quorum(1)); + assert!(!dag_state.certified_parent_quorum(2)); } #[test] @@ -3449,7 +3458,12 @@ mod tests { ConsensusProtocol::SailfishPlusPlus, ); let ancestor_ref = *ancestor.reference(); - let target = make_full_block(1, 2, vec![ancestor_ref], ConsensusProtocol::SailfishPlusPlus); + let target = make_full_block( + 1, + 2, + vec![ancestor_ref], + ConsensusProtocol::SailfishPlusPlus, + ); let target_ref = *target.reference(); let supporter_a = make_full_block(0, 3, vec![target_ref], ConsensusProtocol::SailfishPlusPlus); diff --git a/crates/starfish-core/src/sailfish_service.rs b/crates/starfish-core/src/sailfish_service.rs index f7650195..2c3e5e87 100644 --- a/crates/starfish-core/src/sailfish_service.rs +++ b/crates/starfish-core/src/sailfish_service.rs @@ -530,7 +530,7 @@ mod tests { use crate::crypto; fn make_committee(n: usize) -> Arc { - Committee::new_test(vec![1; n]) + Committee::new_for_benchmarks(n) } fn test_signer(authority: AuthorityIndex) -> Signer { diff --git a/crates/starfish-core/src/types.rs b/crates/starfish-core/src/types.rs index 00418858..a0250689 100644 --- a/crates/starfish-core/src/types.rs +++ b/crates/starfish-core/src/types.rs @@ -1339,18 +1339,23 @@ impl VerifiedBlock { } else { let sf = self.header.sailfish().ok_or_else(|| { eyre::eyre!( - "SailfishPlusPlus block missing timeout cert because previous-round leader {prev_leader} is not referenced" + "SailfishPlusPlus block missing timeout cert \ + because previous-round leader \ + {prev_leader} is not referenced" ) })?; ensure!( sf.timeout_cert.is_some(), - "SailfishPlusPlus block missing timeout cert because previous-round leader {} is not referenced", + "SailfishPlusPlus block missing timeout cert \ + because previous-round leader {} is not referenced", prev_leader ); if self.authority() == committee.elect_leader(round) { ensure!( sf.no_vote_cert.is_some(), - "SailfishPlusPlus leader block missing no-vote cert because previous-round leader {} is not referenced", + "SailfishPlusPlus leader block missing \ + no-vote cert because previous-round \ + leader {} is not referenced", prev_leader ); } @@ -2185,8 +2190,7 @@ mod tests { } #[test] - fn verifies_sailfish_leader_block_with_timeout_and_no_vote_certs_when_previous_leader_is_missing() - { + fn verifies_sailfish_leader_with_timeout_no_vote_certs_when_prev_leader_missing() { let committee = Committee::new_for_benchmarks(4); let signers = Signer::new_for_test(committee.len()); let leader = committee.elect_leader(3); diff --git a/scripts/dryrun.sh b/scripts/dryrun.sh index b66cbd5e..c54e598b 100755 --- a/scripts/dryrun.sh +++ b/scripts/dryrun.sh @@ -7,8 +7,8 @@ NUM_NODES=${NUM_NODES:-10} DESIRED_TPS=${DESIRED_TPS:-1000} # Options: starfish, starfish-speed, starfish-bls, -# sailfish-pp, cordial-miners, mysticeti -CONSENSUS=${CONSENSUS:-sailfish-pp} +# cordial-miners, mysticeti +CONSENSUS=${CONSENSUS:-starfish-speed} NUM_BYZANTINE_NODES=${NUM_BYZANTINE_NODES:-0} # Options: timeout-leader, leader-withholding, # equivocating-chains, equivocating-two-chains, @@ -24,7 +24,7 @@ STORAGE_BACKEND=${STORAGE_BACKEND:-rocksdb} TRANSACTION_MODE=${TRANSACTION_MODE:-random} # Dissemination mode: protocol-default (default) | pull | # push-causal | push-useful -#DISSEMINATION_MODE=${DISSEMINATION_MODE:-} +DISSEMINATION_MODE=${DISSEMINATION_MODE:-push-causal} # Enable lz4 network compression. # Auto-enabled for random transaction mode. # Set COMPRESS_NETWORK=1 or =0 to override.