diff --git a/crates/hotshot/src/tasks/task_state.rs b/crates/hotshot/src/tasks/task_state.rs index 09bc3b0eb4..536e6239a6 100644 --- a/crates/hotshot/src/tasks/task_state.rs +++ b/crates/hotshot/src/tasks/task_state.rs @@ -270,6 +270,7 @@ impl, V: Versions> CreateTaskState formed_upgrade_certificate: None, upgrade_lock: handle.hotshot.upgrade_lock.clone(), epoch_height: handle.hotshot.config.epoch_height, + highest_qc: handle.hotshot.consensus.read().await.high_qc().clone(), } } } diff --git a/crates/task-impls/src/consensus/handlers.rs b/crates/task-impls/src/consensus/handlers.rs index cfc399bd56..287e14e03e 100644 --- a/crates/task-impls/src/consensus/handlers.rs +++ b/crates/task-impls/src/consensus/handlers.rs @@ -26,6 +26,7 @@ use crate::{ consensus::Versions, events::HotShotEvent, helpers::broadcast_event, vote_collection::handle_vote, }; +use vbs::version::StaticVersionType; /// Handle a `QuorumVoteRecv` event. pub(crate) async fn handle_quorum_vote_recv< @@ -112,6 +113,30 @@ pub(crate) async fn handle_timeout_vote_recv< Ok(()) } +/// Send an event to the next leader containing the highest QC we have +/// This is a necessary part of HotStuff 2 but not the original HotStuff +/// +/// #Errors +/// Returns and error if we can't get the version or the version doesn't +/// yet support HS 2 +pub async fn send_high_qc>( + new_view_number: TYPES::View, + sender: &Sender>>, + task_state: &mut ConsensusTaskState, +) -> Result<()> { + let version = task_state.upgrade_lock.version(new_view_number).await?; + ensure!( + version >= V::Epochs::VERSION, + debug!("HotStuff 2 updgrade not yet in effect") + ); + let high_qc = task_state.consensus.read().await.high_qc().clone(); + let leader = task_state + .quorum_membership + .leader(new_view_number, TYPES::Epoch::new(0))?; + broadcast_event(Arc::new(HotShotEvent::HighQcSend(high_qc, leader)), sender).await; + Ok(()) +} + /// Handle a `ViewChange` event. #[instrument(skip_all)] pub(crate) async fn handle_view_change< @@ -140,6 +165,15 @@ pub(crate) async fn handle_view_change< if *old_view_number / 100 != *new_view_number / 100 { tracing::info!("Progress: entered view {:>6}", *new_view_number); } + + // Send our high qc to the next leader immediately upon finishing a view. + // Part of HotStuff 2 + let _ = send_high_qc(new_view_number, sender, task_state) + .await + .inspect_err(|e| { + tracing::debug!("High QC sending failed with error: {:?}", e); + }); + // Move this node to the next view task_state.cur_view = new_view_number; task_state diff --git a/crates/task-impls/src/events.rs b/crates/task-impls/src/events.rs index bca4b4a12b..6744208fa5 100644 --- a/crates/task-impls/src/events.rs +++ b/crates/task-impls/src/events.rs @@ -236,6 +236,12 @@ pub enum HotShotEvent { TYPES::SignatureKey, Proposal>, ), + + /// A replica send us a High QC + HighQcRecv(QuorumCertificate, TYPES::SignatureKey), + + /// Send our HighQC to the next leader, should go to the same leader as our vote + HighQcSend(QuorumCertificate, TYPES::SignatureKey), } impl HotShotEvent { @@ -311,6 +317,9 @@ impl HotShotEvent { | HotShotEvent::VidRequestRecv(request, _) => Some(request.view), HotShotEvent::VidResponseSend(_, _, proposal) | HotShotEvent::VidResponseRecv(_, proposal) => Some(proposal.data.view_number), + HotShotEvent::HighQcRecv(qc, _) | HotShotEvent::HighQcSend(qc, _) => { + Some(qc.view_number()) + } } } } @@ -569,6 +578,12 @@ impl Display for HotShotEvent { proposal.data.view_number ) } + HotShotEvent::HighQcRecv(qc, _) => { + write!(f, "HighQcRecv(view_number={:?}", qc.view_number()) + } + HotShotEvent::HighQcSend(qc, _) => { + write!(f, "HighQcSend(view_number={:?}", qc.view_number()) + } } } } diff --git a/crates/task-impls/src/helpers.rs b/crates/task-impls/src/helpers.rs index cce95b6be3..bcabd40e9e 100644 --- a/crates/task-impls/src/helpers.rs +++ b/crates/task-impls/src/helpers.rs @@ -9,7 +9,7 @@ use std::{ sync::Arc, }; -use async_broadcast::{InactiveReceiver, Receiver, SendError, Sender}; +use async_broadcast::{Receiver, SendError, Sender}; use async_lock::RwLock; use committable::{Commitment, Committable}; use hotshot_task::dependency::{Dependency, EventDependency}; @@ -192,6 +192,89 @@ impl Default for LeafChainTraversalOutcome { } } +/// calculate the new decided leaf chain based on the rules of hostuff 2 +/// +/// # Panics +/// Can't actually panic +pub async fn decide_from_proposal_2( + proposal: &QuorumProposal, + consensus: OuterConsensus, + existing_upgrade_cert: Arc>>>, + public_key: &TYPES::SignatureKey, +) -> LeafChainTraversalOutcome { + let mut res = LeafChainTraversalOutcome::default(); + let consensus_reader = consensus.read().await; + let proposed_leaf = Leaf::from_quorum_proposal(proposal); + res.new_locked_view_number = Some(proposed_leaf.justify_qc().view_number()); + + // If we don't have the proposals parent return early + let Some(parent_info) = consensus_reader.parent_leaf_info(&proposed_leaf, public_key) else { + return res; + }; + // Get the parents parent and check if it's consecutive in view to the parent, if so we can decided + // the grandparents view. If not we're done. + let Some(grand_parent_info) = consensus_reader.parent_leaf_info(&parent_info.leaf, public_key) + else { + return res; + }; + if grand_parent_info.leaf.view_number() + 1 != parent_info.leaf.view_number() { + return res; + } + res.new_decide_qc = Some(parent_info.leaf.justify_qc().clone()); + let decided_view_number = grand_parent_info.leaf.view_number(); + res.new_decided_view_number = Some(decided_view_number); + // We've reached decide, now get the leaf chain all the way back to the last decided view, not including it. + let old_anchor_view = consensus_reader.last_decided_view(); + let mut current_leaf_info = Some(grand_parent_info); + let existing_upgrade_cert_reader = existing_upgrade_cert.read().await; + let mut txns = HashSet::new(); + while current_leaf_info + .as_ref() + .is_some_and(|info| info.leaf.view_number() > old_anchor_view) + { + // unwrap is safe, we just checked that he option is some + let info = &mut current_leaf_info.unwrap(); + // Check if there's a new upgrade certificate available. + if let Some(cert) = info.leaf.upgrade_certificate() { + if info.leaf.upgrade_certificate() != *existing_upgrade_cert_reader { + if cert.data.decide_by < decided_view_number { + tracing::warn!("Failed to decide an upgrade certificate in time. Ignoring."); + } else { + tracing::info!("Reached decide on upgrade certificate: {:?}", cert); + res.decided_upgrade_cert = Some(cert.clone()); + } + } + } + + res.leaf_views.push(info.clone()); + // If the block payload is available for this leaf, include it in + // the leaf chain that we send to the client. + if let Some(encoded_txns) = consensus_reader + .saved_payloads() + .get(&info.leaf.view_number()) + { + let payload = + BlockPayload::from_bytes(encoded_txns, info.leaf.block_header().metadata()); + + info.leaf.fill_block_payload_unchecked(payload); + } + + if let Some(ref payload) = info.leaf.block_payload() { + for txn in payload.transaction_commitments(info.leaf.block_header().metadata()) { + txns.insert(txn); + } + } + + current_leaf_info = consensus_reader.parent_leaf_info(&info.leaf, public_key); + } + + if !txns.is_empty() { + res.included_txns = Some(txns); + } + + res +} + /// Ascends the leaf chain by traversing through the parent commitments of the proposal. We begin /// by obtaining the parent view, and if we are in a chain (i.e. the next view from the parent is /// one view newer), then we begin attempting to form the chain. This is a direct impl from @@ -344,7 +427,7 @@ pub async fn decide_from_proposal( pub(crate) async fn parent_leaf_and_state( next_proposal_view_number: TYPES::View, event_sender: &Sender>>, - event_receiver: &InactiveReceiver>>, + event_receiver: &Receiver>>, quorum_membership: Arc, public_key: TYPES::SignatureKey, private_key: ::PrivateKey, @@ -370,7 +453,7 @@ pub(crate) async fn parent_leaf_and_state( let _ = fetch_proposal( parent_view_number, event_sender.clone(), - event_receiver.activate_cloned(), + event_receiver.clone(), quorum_membership, consensus.clone(), public_key.clone(), diff --git a/crates/task-impls/src/network.rs b/crates/task-impls/src/network.rs index d623962424..a80c34759b 100644 --- a/crates/task-impls/src/network.rs +++ b/crates/task-impls/src/network.rs @@ -77,7 +77,7 @@ impl NetworkMessageTaskState { GeneralConsensusMessage::ProposalRequested(req, sig) => { HotShotEvent::QuorumProposalRequestRecv(req, sig) } - GeneralConsensusMessage::LeaderProposalAvailable(proposal) => { + GeneralConsensusMessage::ProposalResponse(proposal) => { HotShotEvent::QuorumProposalResponseRecv(proposal) } GeneralConsensusMessage::Vote(vote) => { @@ -114,6 +114,7 @@ impl NetworkMessageTaskState { tracing::error!("Received upgrade vote!"); HotShotEvent::UpgradeVoteRecv(message) } + GeneralConsensusMessage::HighQC(qc) => HotShotEvent::HighQcRecv(qc, sender), }, SequencingMessage::Da(da_message) => match da_message { DaConsensusMessage::DaProposal(proposal) => { @@ -428,7 +429,7 @@ impl< HotShotEvent::QuorumProposalResponseSend(sender_key, proposal) => Some(( sender_key.clone(), MessageKind::::from_consensus_message(SequencingMessage::General( - GeneralConsensusMessage::LeaderProposalAvailable(proposal), + GeneralConsensusMessage::ProposalResponse(proposal), )), TransmitType::Direct(sender_key), )), diff --git a/crates/task-impls/src/quorum_proposal/handlers.rs b/crates/task-impls/src/quorum_proposal/handlers.rs index 4aa5394dbe..cfc46f4201 100644 --- a/crates/task-impls/src/quorum_proposal/handlers.rs +++ b/crates/task-impls/src/quorum_proposal/handlers.rs @@ -7,10 +7,19 @@ //! This module holds the dependency task for the QuorumProposalTask. It is spawned whenever an event that could //! initiate a proposal occurs. -use std::{marker::PhantomData, sync::Arc}; +use std::{ + marker::PhantomData, + sync::Arc, + time::{Duration, Instant}, +}; +use crate::{ + events::HotShotEvent, + helpers::{broadcast_event, parent_leaf_and_state}, + quorum_proposal::{UpgradeLock, Versions}, +}; use anyhow::{ensure, Context, Result}; -use async_broadcast::{InactiveReceiver, Sender}; +use async_broadcast::{Receiver, Sender}; use async_lock::RwLock; use hotshot_task::dependency_task::HandleDepOutput; use hotshot_types::{ @@ -19,20 +28,16 @@ use hotshot_types::{ message::Proposal, simple_certificate::{QuorumCertificate, UpgradeCertificate}, traits::{ - block_contents::BlockHeader, node_implementation::NodeType, signature_key::SignatureKey, + block_contents::BlockHeader, + node_implementation::{ConsensusTime, NodeType}, + signature_key::SignatureKey, }, - vote::HasViewNumber, + vote::{Certificate, HasViewNumber}, }; use tracing::instrument; use utils::anytrace::*; use vbs::version::StaticVersionType; -use crate::{ - events::HotShotEvent, - helpers::{broadcast_event, parent_leaf_and_state}, - quorum_proposal::{UpgradeLock, Versions}, -}; - /// Proposal dependency types. These types represent events that precipitate a proposal. #[derive(PartialEq, Debug)] pub(crate) enum ProposalDependency { @@ -67,7 +72,7 @@ pub struct ProposalDependencyHandle { pub sender: Sender>>, /// The event receiver. - pub receiver: InactiveReceiver>>, + pub receiver: Receiver>>, /// Immutable instance state pub instance_state: Arc, @@ -84,6 +89,8 @@ pub struct ProposalDependencyHandle { /// Shared consensus task state pub consensus: OuterConsensus, + /// View timeout from config. + pub timeout: u64, /// The most recent upgrade certificate this node formed. /// Note: this is ONLY for certificates that have been formed internally, /// so that we can propose with them. @@ -97,9 +104,79 @@ pub struct ProposalDependencyHandle { /// The node's id pub id: u64, + + /// The time this view started + pub view_start_time: Instant, + + /// The higest_qc we've seen at the start of this task + pub highest_qc: QuorumCertificate, } impl ProposalDependencyHandle { + /// Return the next HighQC we get from the event stream + async fn wait_for_qc_event( + &self, + rx: &mut Receiver>>, + ) -> Option> { + while let Ok(event) = rx.recv_direct().await { + if let HotShotEvent::HighQcRecv(qc, _sender) = event.as_ref() { + if qc + .is_valid_cert( + self.quorum_membership.as_ref(), + TYPES::Epoch::new(0), + &self.upgrade_lock, + ) + .await + { + return Some(qc.clone()); + } + } + } + None + } + /// Waits for the ocnfigured timeout for nodes to send HighQC messages to us. We'll + /// then propose with the higest QC from among these proposals. + async fn wait_for_highest_qc(&mut self) { + tracing::error!("waiting for QC"); + // If we haven't upgraded to Hotstuff 2 just return the high qc right away + if self + .upgrade_lock + .version(self.view_number) + .await + .is_ok_and(|version| version < V::Epochs::VERSION) + { + return; + } + let wait_duration = Duration::from_millis(self.timeout / 2); + + // TODO configure timeout + while self.view_start_time.elapsed() < wait_duration { + let Some(time_spent) = Instant::now().checked_duration_since(self.view_start_time) + else { + // Shouldn't be possible, now must be after the start + return; + }; + let Some(time_left) = wait_duration.checked_sub(time_spent) else { + // No time left + return; + }; + let Ok(maybe_qc) = tokio::time::timeout( + time_left, + self.wait_for_qc_event(&mut self.receiver.clone()), + ) + .await + else { + // we timeout out, don't wait any longer + return; + }; + let Some(qc) = maybe_qc else { + continue; + }; + if qc.view_number() > self.highest_qc.view_number() { + self.highest_qc = qc; + } + } + } /// Publishes a proposal given the [`CommitmentAndMetadata`], [`VidDisperse`] /// and high qc [`hotshot_types::simple_certificate::QuorumCertificate`], /// with optional [`ViewChangeEvidence`]. @@ -170,7 +247,7 @@ impl ProposalDependencyHandle { let metadata = commitment_and_metadata.metadata.clone(); let block_header = if version >= V::Epochs::VERSION - && self.consensus.read().await.is_high_qc_forming_eqc() + && self.consensus.read().await.is_qc_forming_eqc(&parent_qc) { tracing::info!("Reached end of epoch. Proposing the same block again to form an eQC."); let block_header = parent_leaf.block_header().clone(); @@ -261,7 +338,7 @@ impl HandleDepOutput for ProposalDependencyHandle< type Output = Vec>>>>; #[allow(clippy::no_effect_underscore_binding, clippy::too_many_lines)] - async fn handle_dep_result(self, res: Self::Output) { + async fn handle_dep_result(mut self, res: Self::Output) { let mut commit_and_metadata: Option> = None; let mut timeout_certificate = None; let mut view_sync_finalize_cert = None; @@ -304,7 +381,21 @@ impl HandleDepOutput for ProposalDependencyHandle< } } - let parent_qc = parent_qc.unwrap_or(self.consensus.read().await.high_qc().clone()); + let Ok(version) = self.upgrade_lock.version(self.view_number).await else { + tracing::error!( + "Failed to get version for view {:?}, not proposing", + self.view_number + ); + return; + }; + let parent_qc = if let Some(qc) = parent_qc { + qc + } else if version < V::Epochs::VERSION { + self.consensus.read().await.high_qc().clone() + } else { + self.wait_for_highest_qc().await; + self.highest_qc.clone() + }; if commit_and_metadata.is_none() { tracing::error!( diff --git a/crates/task-impls/src/quorum_proposal/mod.rs b/crates/task-impls/src/quorum_proposal/mod.rs index 02f6ee23e8..a6b641e602 100644 --- a/crates/task-impls/src/quorum_proposal/mod.rs +++ b/crates/task-impls/src/quorum_proposal/mod.rs @@ -4,7 +4,7 @@ // You should have received a copy of the MIT License // along with the HotShot repository. If not, see . -use std::{collections::BTreeMap, sync::Arc}; +use std::{collections::BTreeMap, sync::Arc, time::Instant}; use async_broadcast::{Receiver, Sender}; use async_lock::RwLock; @@ -19,7 +19,7 @@ use hotshot_types::{ consensus::OuterConsensus, event::Event, message::UpgradeLock, - simple_certificate::UpgradeCertificate, + simple_certificate::{QuorumCertificate, UpgradeCertificate}, traits::{ election::Membership, node_implementation::{ConsensusTime, NodeImplementation, NodeType, Versions}, @@ -91,6 +91,9 @@ pub struct QuorumProposalTaskState /// Number of blocks in an epoch, zero means there are no epochs pub epoch_height: u64, + + /// The higest_qc we've seen at the start of this task + pub highest_qc: QuorumCertificate, } impl, V: Versions> @@ -312,15 +315,18 @@ impl, V: Versions> latest_proposed_view: self.latest_proposed_view, view_number, sender: event_sender, - receiver: event_receiver.deactivate(), + receiver: event_receiver, quorum_membership: Arc::clone(&self.quorum_membership), public_key: self.public_key.clone(), private_key: self.private_key.clone(), instance_state: Arc::clone(&self.instance_state), consensus: OuterConsensus::new(Arc::clone(&self.consensus.inner_consensus)), + timeout: self.timeout, formed_upgrade_certificate: self.formed_upgrade_certificate.clone(), upgrade_lock: self.upgrade_lock.clone(), id: self.id, + view_start_time: Instant::now(), + highest_qc: self.highest_qc.clone(), }, ); self.proposal_dependencies @@ -506,6 +512,20 @@ impl, V: Versions> HotShotEvent::ViewChange(view, _) | HotShotEvent::Timeout(view) => { self.cancel_tasks(*view); } + HotShotEvent::HighQcSend(qc, _sender) => { + ensure!(qc.view_number() > self.highest_qc.view_number()); + let epoch_number = self.consensus.read().await.cur_epoch(); + ensure!( + qc.is_valid_cert( + self.quorum_membership.as_ref(), + epoch_number, + &self.upgrade_lock + ) + .await, + warn!("Qurom certificate {:?} was invalid", qc.data()) + ); + self.highest_qc = qc.clone(); + } _ => {} } Ok(()) diff --git a/crates/task-impls/src/quorum_proposal_recv/handlers.rs b/crates/task-impls/src/quorum_proposal_recv/handlers.rs index 24ea89a3c2..d7ce8aefc0 100644 --- a/crates/task-impls/src/quorum_proposal_recv/handlers.rs +++ b/crates/task-impls/src/quorum_proposal_recv/handlers.rs @@ -40,7 +40,7 @@ use crate::{ }, quorum_proposal_recv::{UpgradeLock, Versions}, }; - +use vbs::version::StaticVersionType; /// Update states in the event that the parent state is not found for a given `proposal`. #[instrument(skip_all)] async fn validate_proposal_liveness, V: Versions>( @@ -77,6 +77,16 @@ async fn validate_proposal_liveness consensus_writer.locked_view(); + // if we are using HS2 we update our locked view for any QC from a leader greater than our current lock + if liveness_check + && validation_info + .upgrade_lock + .version(leaf.view_number()) + .await + .is_ok_and(|v| v >= V::Epochs::VERSION) + { + consensus_writer.update_locked_view(proposal.data.justify_qc.clone().view_number())?; + } drop(consensus_writer); diff --git a/crates/task-impls/src/quorum_proposal_recv/mod.rs b/crates/task-impls/src/quorum_proposal_recv/mod.rs index 8f9d9e3f05..332ecaf241 100644 --- a/crates/task-impls/src/quorum_proposal_recv/mod.rs +++ b/crates/task-impls/src/quorum_proposal_recv/mod.rs @@ -11,7 +11,7 @@ use std::{collections::BTreeMap, sync::Arc}; use self::handlers::handle_quorum_proposal_recv; use crate::{ events::{HotShotEvent, ProposalMissing}, - helpers::{broadcast_event, fetch_proposal, parent_leaf_and_state}, + helpers::{broadcast_event, fetch_proposal}, }; use async_broadcast::{broadcast, Receiver, Sender}; use async_lock::RwLock; diff --git a/crates/task-impls/src/quorum_vote/handlers.rs b/crates/task-impls/src/quorum_vote/handlers.rs index d4b9edc6c7..198d075de5 100644 --- a/crates/task-impls/src/quorum_vote/handlers.rs +++ b/crates/task-impls/src/quorum_vote/handlers.rs @@ -26,11 +26,15 @@ use hotshot_types::{ }; use tracing::instrument; use utils::anytrace::*; +use vbs::version::StaticVersionType; use super::QuorumVoteTaskState; use crate::{ events::HotShotEvent, - helpers::{broadcast_event, decide_from_proposal, fetch_proposal, LeafChainTraversalOutcome}, + helpers::{ + broadcast_event, decide_from_proposal, decide_from_proposal_2, fetch_proposal, + LeafChainTraversalOutcome, + }, quorum_vote::Versions, }; @@ -44,6 +48,11 @@ pub(crate) async fn handle_quorum_proposal_validated< proposal: &QuorumProposal, task_state: &mut QuorumVoteTaskState, ) -> Result<()> { + let version = task_state + .upgrade_lock + .version(proposal.view_number()) + .await?; + let LeafChainTraversalOutcome { new_locked_view_number, new_decided_view_number, @@ -51,13 +60,23 @@ pub(crate) async fn handle_quorum_proposal_validated< leaf_views, included_txns, decided_upgrade_cert, - } = decide_from_proposal( - proposal, - OuterConsensus::new(Arc::clone(&task_state.consensus.inner_consensus)), - Arc::clone(&task_state.upgrade_lock.decided_upgrade_certificate), - &task_state.public_key, - ) - .await; + } = if version >= V::Epochs::VERSION { + decide_from_proposal_2( + proposal, + OuterConsensus::new(Arc::clone(&task_state.consensus.inner_consensus)), + Arc::clone(&task_state.upgrade_lock.decided_upgrade_certificate), + &task_state.public_key, + ) + .await + } else { + decide_from_proposal( + proposal, + OuterConsensus::new(Arc::clone(&task_state.consensus.inner_consensus)), + Arc::clone(&task_state.upgrade_lock.decided_upgrade_certificate), + &task_state.public_key, + ) + .await + }; if let Some(cert) = decided_upgrade_cert.clone() { let mut decided_certificate_lock = task_state diff --git a/crates/testing/tests/tests_1/test_success.rs b/crates/testing/tests/tests_1/test_success.rs index 588c718a83..e81060aedb 100644 --- a/crates/testing/tests/tests_1/test_success.rs +++ b/crates/testing/tests/tests_1/test_success.rs @@ -17,6 +17,7 @@ use hotshot_macros::cross_tests; use hotshot_testing::{ block_builder::SimpleBuilderImplementation, completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription}, + spinning_task::{ChangeNode, NodeAction, SpinningTaskDescription}, test_builder::TestDescription, view_sync_task::ViewSyncTaskDescription, }; @@ -151,3 +152,38 @@ cross_tests!( } }, ); + +// Test to make sure we can decide in just 3 views +// This test fails with the old decide rule +cross_tests!( + TestName: test_shorter_decide, + Impls: [MemoryImpl], + Types: [TestTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription { + completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder( + TimeBasedCompletionTaskDescription { + duration: Duration::from_millis(100000), + }, + ), + ..TestDescription::default() + }; + // after the first 3 leaders the next leader is down. It's a hack to make sure we decide in + // 3 views or else we get a timeout + let dead_nodes = vec![ + ChangeNode { + idx: 4, + updown: NodeAction::Down, + }, + + ]; + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(1, dead_nodes)] + }; + metadata.overall_safety_properties.num_successful_views = 1; + metadata.overall_safety_properties.num_failed_views = 0; + metadata + }, +); diff --git a/crates/testing/tests/tests_3/test_with_failures_half_f.rs b/crates/testing/tests/tests_3/test_with_failures_half_f.rs index 797aa77cab..a8a2dbb14b 100644 --- a/crates/testing/tests/tests_3/test_with_failures_half_f.rs +++ b/crates/testing/tests/tests_3/test_with_failures_half_f.rs @@ -5,7 +5,7 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::{ - node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -52,3 +52,40 @@ cross_tests!( metadata } ); +cross_tests!( + TestName: test_with_failures_half_f_epochs, + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Types: [TestTypes], + Versions: [EpochsTestVersions], + Ignore: false, + Metadata: { + let mut metadata = TestDescription::default_more_nodes(); + metadata.num_bootstrap_nodes = 17; + // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the + // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the + // following issue. + let dead_nodes = vec![ + ChangeNode { + idx: 17, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 18, + updown: NodeAction::Down, + }, + ChangeNode { + idx: 19, + updown: NodeAction::Down, + }, + ]; + + metadata.spinning_properties = SpinningTaskDescription { + node_changes: vec![(5, dead_nodes)] + }; + + metadata.overall_safety_properties.num_failed_views = 3; + // Make sure we keep committing rounds after the bad leaders, but not the full 50 because of the numerous timeouts + metadata.overall_safety_properties.num_successful_views = 22; + metadata + } +); diff --git a/crates/types/src/consensus.rs b/crates/types/src/consensus.rs index eb8fe4ade2..499430c0ca 100644 --- a/crates/types/src/consensus.rs +++ b/crates/types/src/consensus.rs @@ -23,7 +23,7 @@ pub use crate::utils::{View, ViewInner}; use crate::{ data::{Leaf, QuorumProposal, VidDisperse, VidDisperseShare}, error::HotShotError, - event::HotShotAction, + event::{HotShotAction, LeafInfo}, message::{Proposal, UpgradeLock}, simple_certificate::{DaCertificate, QuorumCertificate}, traits::{ @@ -37,7 +37,7 @@ use crate::{ epoch_from_block_number, BuilderCommitment, LeafCommitment, StateAndDelta, Terminator, }, vid::VidCommitment, - vote::HasViewNumber, + vote::{Certificate, HasViewNumber}, }; /// A type alias for `HashMap, T>` @@ -497,6 +497,36 @@ impl Consensus { Ok(()) } + /// Get the parent Leaf Info from a given leaf and our public key. + /// Returns None if we don't have the data in out state + pub fn parent_leaf_info( + &self, + leaf: &Leaf, + public_key: &TYPES::SignatureKey, + ) -> Option> { + let parent_view_number = leaf.justify_qc().view_number(); + let parent_leaf = self + .saved_leaves + .get(&leaf.justify_qc().data().leaf_commit)?; + let parent_state_and_delta = self.state_and_delta(parent_view_number); + let (Some(state), delta) = parent_state_and_delta else { + return None; + }; + let parent_vid = self + .vid_shares() + .get(&parent_view_number)? + .get(public_key) + .cloned() + .map(|prop| prop.data); + + Some(LeafInfo { + leaf: parent_leaf.clone(), + state, + delta, + vid_share: parent_vid, + }) + } + /// Update the current epoch. /// # Errors /// Can return an error when the new epoch_number is not higher than the existing epoch number. @@ -788,13 +818,14 @@ impl Consensus { /// # Panics /// On inconsistent stored entries pub fn collect_garbage(&mut self, old_anchor_view: TYPES::View, new_anchor_view: TYPES::View) { + let gc_view = TYPES::View::new(new_anchor_view.saturating_sub(1)); // state check let anchor_entry = self .validated_state_map .iter() .next() .expect("INCONSISTENT STATE: anchor leaf not in state map!"); - if *anchor_entry.0 != old_anchor_view { + if **anchor_entry.0 != old_anchor_view.saturating_sub(1) { tracing::error!( "Something about GC has failed. Older leaf exists than the previous anchor leaf." ); @@ -803,15 +834,15 @@ impl Consensus { self.saved_da_certs .retain(|view_number, _| *view_number >= old_anchor_view); self.validated_state_map - .range(old_anchor_view..new_anchor_view) + .range(old_anchor_view..gc_view) .filter_map(|(_view_number, view)| view.leaf_commitment()) .for_each(|leaf| { self.saved_leaves.remove(&leaf); }); - self.validated_state_map = self.validated_state_map.split_off(&new_anchor_view); - self.saved_payloads = self.saved_payloads.split_off(&new_anchor_view); - self.vid_shares = self.vid_shares.split_off(&new_anchor_view); - self.last_proposals = self.last_proposals.split_off(&new_anchor_view); + self.validated_state_map = self.validated_state_map.split_off(&gc_view); + self.saved_payloads = self.saved_payloads.split_off(&gc_view); + self.vid_shares = self.vid_shares.split_off(&gc_view); + self.last_proposals = self.last_proposals.split_off(&gc_view); } /// Gets the last decided leaf. @@ -885,10 +916,10 @@ impl Consensus { Some(()) } - /// Return true if the high QC takes part in forming an eQC, i.e. + /// Return true if the QC takes part in forming an eQC, i.e. /// it is one of the 3-chain certificates but not the eQC itself - pub fn is_high_qc_forming_eqc(&self) -> bool { - let high_qc_leaf_commit = self.high_qc().data.leaf_commit; + pub fn is_qc_forming_eqc(&self, qc: &QuorumCertificate) -> bool { + let high_qc_leaf_commit = qc.data.leaf_commit; let is_high_qc_extended = self.is_leaf_extended(high_qc_leaf_commit); if is_high_qc_extended { tracing::debug!("We have formed an eQC!"); @@ -896,6 +927,11 @@ impl Consensus { self.is_leaf_for_last_block(high_qc_leaf_commit) && !is_high_qc_extended } + /// Returns true if our high qc is forming an eQC + pub fn is_high_qc_forming_eqc(&self) -> bool { + self.is_qc_forming_eqc(self.high_qc()) + } + /// Return true if the given leaf takes part in forming an eQC, i.e. /// it is one of the 3-chain leaves but not the eQC leaf itself pub fn is_leaf_forming_eqc(&self, leaf_commit: LeafCommitment) -> bool { @@ -950,7 +986,6 @@ impl Consensus { }, ) { is_leaf_extended = false; - tracing::trace!("The chain is broken. Leaf ascension failed."); tracing::debug!("Leaf ascension failed; error={e}"); } tracing::trace!("Can the given leaf form an eQC? {}", is_leaf_extended); diff --git a/crates/types/src/message.rs b/crates/types/src/message.rs index 217b5d7578..bf31005c9a 100644 --- a/crates/types/src/message.rs +++ b/crates/types/src/message.rs @@ -29,7 +29,7 @@ use crate::{ data::{DaProposal, Leaf, QuorumProposal, UpgradeProposal, VidDisperseShare}, request_response::ProposalRequestPayload, simple_certificate::{ - DaCertificate, UpgradeCertificate, ViewSyncCommitCertificate2, + DaCertificate, QuorumCertificate, UpgradeCertificate, ViewSyncCommitCertificate2, ViewSyncFinalizeCertificate2, ViewSyncPreCommitCertificate2, }, simple_vote::{ @@ -159,14 +159,6 @@ impl ViewMessage for MessageKind { MessageKind::External(_) => TYPES::View::new(1), } } - - fn purpose(&self) -> MessagePurpose { - match &self { - MessageKind::Consensus(message) => message.purpose(), - MessageKind::Data(_) => MessagePurpose::Data, - MessageKind::External(_) => MessagePurpose::External, - } - } } #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] @@ -212,8 +204,11 @@ pub enum GeneralConsensusMessage { ::PureAssembledSignatureType, ), - /// The leader has responded with a valid proposal. - LeaderProposalAvailable(Proposal>), + /// A replica has responded with a valid proposal. + ProposalResponse(Proposal>), + + /// Message for the next leader containing our highest QC + HighQC(QuorumCertificate), } #[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Hash, Eq)] @@ -258,7 +253,7 @@ impl SequencingMessage { p.data.view_number() } GeneralConsensusMessage::ProposalRequested(req, _) => req.view_number, - GeneralConsensusMessage::LeaderProposalAvailable(proposal) => { + GeneralConsensusMessage::ProposalResponse(proposal) => { proposal.data.view_number() } GeneralConsensusMessage::Vote(vote_message) => vote_message.view_number(), @@ -279,6 +274,7 @@ impl SequencingMessage { } GeneralConsensusMessage::UpgradeProposal(message) => message.data.view_number(), GeneralConsensusMessage::UpgradeVote(message) => message.view_number(), + GeneralConsensusMessage::HighQC(qc) => qc.view_number(), } } SequencingMessage::Da(da_message) => { @@ -295,42 +291,6 @@ impl SequencingMessage { } } } - - // TODO: Disable panic after the `ViewSync` case is implemented. - /// Get the message purpos - #[allow(clippy::panic)] - fn purpose(&self) -> MessagePurpose { - match &self { - SequencingMessage::General(general_message) => match general_message { - GeneralConsensusMessage::Proposal(_) => MessagePurpose::Proposal, - GeneralConsensusMessage::ProposalRequested(_, _) - | GeneralConsensusMessage::LeaderProposalAvailable(_) => { - MessagePurpose::LatestProposal - } - GeneralConsensusMessage::Vote(_) | GeneralConsensusMessage::TimeoutVote(_) => { - MessagePurpose::Vote - } - GeneralConsensusMessage::ViewSyncPreCommitVote(_) - | GeneralConsensusMessage::ViewSyncCommitVote(_) - | GeneralConsensusMessage::ViewSyncFinalizeVote(_) => MessagePurpose::ViewSyncVote, - - GeneralConsensusMessage::ViewSyncPreCommitCertificate(_) - | GeneralConsensusMessage::ViewSyncCommitCertificate(_) - | GeneralConsensusMessage::ViewSyncFinalizeCertificate(_) => { - MessagePurpose::ViewSyncCertificate - } - - GeneralConsensusMessage::UpgradeProposal(_) => MessagePurpose::UpgradeProposal, - GeneralConsensusMessage::UpgradeVote(_) => MessagePurpose::UpgradeVote, - }, - SequencingMessage::Da(da_message) => match da_message { - DaConsensusMessage::DaProposal(_) => MessagePurpose::Proposal, - DaConsensusMessage::DaVote(_) => MessagePurpose::Vote, - DaConsensusMessage::DaCertificate(_) => MessagePurpose::DaCertificate, - DaConsensusMessage::VidDisperseMsg(_) => MessagePurpose::VidDisperse, - }, - } - } } #[derive(Serialize, Deserialize, Derivative, Clone, Debug, PartialEq, Eq, Hash)] diff --git a/crates/types/src/traits/network.rs b/crates/types/src/traits/network.rs index 5a01560832..750ff4d4ec 100644 --- a/crates/types/src/traits/network.rs +++ b/crates/types/src/traits/network.rs @@ -30,11 +30,7 @@ use thiserror::Error; use tokio::{sync::mpsc::error::TrySendError, time::sleep}; use super::{node_implementation::NodeType, signature_key::SignatureKey}; -use crate::{ - data::ViewNumber, - message::{MessagePurpose, SequencingMessage}, - BoxSyncFuture, -}; +use crate::{data::ViewNumber, message::SequencingMessage, BoxSyncFuture}; /// Centralized server specific errors #[derive(Debug, Error, Serialize, Deserialize)] @@ -122,9 +118,6 @@ pub trait Id: Eq + PartialEq + Hash {} pub trait ViewMessage { /// get the view out of the message fn view_number(&self) -> TYPES::View; - // TODO move out of this trait. - /// get the purpose of the message - fn purpose(&self) -> MessagePurpose; } /// A request for some data that the consensus layer is asking for.