diff --git a/beacon_node/beacon_chain/src/attestation_verification.rs b/beacon_node/beacon_chain/src/attestation_verification.rs index 06fba937d84..5a730719bfb 100644 --- a/beacon_node/beacon_chain/src/attestation_verification.rs +++ b/beacon_node/beacon_chain/src/attestation_verification.rs @@ -35,7 +35,6 @@ mod batch; use crate::{ - beacon_chain::VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT, metrics, observed_aggregates::{ObserveOutcome, ObservedAttestationKey}, observed_attesters::Error as ObservedAttestersError, @@ -1174,10 +1173,7 @@ pub fn verify_attestation_signature( let signature_setup_timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_SIGNATURE_SETUP_TIMES); - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let fork = chain .spec @@ -1272,10 +1268,7 @@ pub fn verify_signed_aggregate_signatures( signed_aggregate: &SignedAggregateAndProof, indexed_attestation: &IndexedAttestation, ) -> Result { - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let aggregator_index = signed_aggregate.message().aggregator_index(); if aggregator_index >= pubkey_cache.len() as u64 { diff --git a/beacon_node/beacon_chain/src/attestation_verification/batch.rs b/beacon_node/beacon_chain/src/attestation_verification/batch.rs index 07fad1bd4a8..5f856140ba8 100644 --- a/beacon_node/beacon_chain/src/attestation_verification/batch.rs +++ b/beacon_node/beacon_chain/src/attestation_verification/batch.rs @@ -13,10 +13,7 @@ use super::{ CheckAttestationSignature, Error, IndexedAggregatedAttestation, IndexedUnaggregatedAttestation, VerifiedAggregatedAttestation, VerifiedUnaggregatedAttestation, }; -use crate::{ - beacon_chain::VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT, metrics, BeaconChain, BeaconChainError, - BeaconChainTypes, -}; +use crate::{metrics, BeaconChain, BeaconChainError, BeaconChainTypes}; use bls::verify_signature_sets; use state_processing::signature_sets::{ indexed_attestation_signature_set_from_pubkeys, signed_aggregate_selection_proof_signature_set, @@ -60,10 +57,7 @@ where let signature_setup_timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_BATCH_AGG_SIGNATURE_SETUP_TIMES); - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let mut signature_sets = Vec::with_capacity(num_indexed * 3); // Iterate, flattening to get only the `Ok` values. @@ -169,10 +163,7 @@ where &metrics::ATTESTATION_PROCESSING_BATCH_UNAGG_SIGNATURE_SETUP_TIMES, ); - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let mut signature_sets = Vec::with_capacity(num_partially_verified); diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index f795128b714..6d2973021a4 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -63,7 +63,6 @@ use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache}; use crate::sync_committee_verification::{ Error as SyncCommitteeError, VerifiedSyncCommitteeMessage, VerifiedSyncContribution, }; -use crate::timeout_rw_lock::TimeoutRwLock; use crate::validator_monitor::{ get_slot_delay_ms, timestamp_now, ValidatorMonitor, HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS, @@ -132,17 +131,6 @@ pub type ForkChoiceError = fork_choice::Error; /// Alias to appease clippy. type HashBlockTuple = (Hash256, RpcBlock); -/// The time-out before failure during an operation to take a read/write RwLock on the -/// attestation cache. -pub const ATTESTATION_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(1); - -/// The time-out before failure during an operation to take a read/write RwLock on the -/// validator pubkey cache. -pub const VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(1); - -/// The timeout for the eth1 finalization cache -pub const ETH1_FINALIZATION_CACHE_LOCK_TIMEOUT: Duration = Duration::from_millis(200); - // These keys are all zero because they get stored in different columns, see `DBColumn` type. pub const BEACON_CHAIN_DB_KEY: Hash256 = Hash256::zero(); pub const OP_POOL_DB_KEY: Hash256 = Hash256::zero(); @@ -465,13 +453,13 @@ pub struct BeaconChain { /// Used to track the heads of the beacon chain. pub(crate) head_tracker: Arc, /// Caches the attester shuffling for a given epoch and shuffling key root. - pub shuffling_cache: TimeoutRwLock, + pub shuffling_cache: RwLock, /// A cache of eth1 deposit data at epoch boundaries for deposit finalization - pub eth1_finalization_cache: TimeoutRwLock, + pub eth1_finalization_cache: RwLock, /// Caches the beacon block proposer shuffling for a given epoch and shuffling key root. pub beacon_proposer_cache: Arc>, /// Caches a map of `validator_index -> validator_pubkey`. - pub(crate) validator_pubkey_cache: TimeoutRwLock>, + pub(crate) validator_pubkey_cache: RwLock>, /// A cache used when producing attestations. pub(crate) attester_cache: Arc, /// A cache used when producing attestations whilst the head block is still being imported. @@ -1472,10 +1460,7 @@ impl BeaconChain { /// /// May return an error if acquiring a read-lock on the `validator_pubkey_cache` times out. pub fn validator_index(&self, pubkey: &PublicKeyBytes) -> Result, Error> { - let pubkey_cache = self - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = self.validator_pubkey_cache.read(); Ok(pubkey_cache.get_index(pubkey)) } @@ -1488,10 +1473,7 @@ impl BeaconChain { &self, validator_pubkeys: impl Iterator, ) -> Result, Error> { - let pubkey_cache = self - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = self.validator_pubkey_cache.read(); validator_pubkeys .map(|pubkey| { @@ -1516,10 +1498,7 @@ impl BeaconChain { /// /// May return an error if acquiring a read-lock on the `validator_pubkey_cache` times out. pub fn validator_pubkey(&self, validator_index: usize) -> Result, Error> { - let pubkey_cache = self - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = self.validator_pubkey_cache.read(); Ok(pubkey_cache.get(validator_index).cloned()) } @@ -1529,10 +1508,7 @@ impl BeaconChain { &self, validator_index: usize, ) -> Result, Error> { - let pubkey_cache = self - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = self.validator_pubkey_cache.read(); Ok(pubkey_cache.get_pubkey_bytes(validator_index).copied()) } @@ -1546,10 +1522,7 @@ impl BeaconChain { &self, validator_indices: &[usize], ) -> Result, Error> { - let pubkey_cache = self - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = self.validator_pubkey_cache.read(); let mut map = HashMap::with_capacity(validator_indices.len()); for &validator_index in validator_indices { @@ -3506,11 +3479,12 @@ impl BeaconChain { // is so we don't have to think about lock ordering with respect to the fork choice lock. // There are a bunch of places where we lock both fork choice and the pubkey cache and it // would be difficult to check that they all lock fork choice first. - let mut ops = self - .validator_pubkey_cache - .try_write_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(Error::ValidatorPubkeyCacheLockTimeout)? - .import_new_pubkeys(&state)?; + let mut ops = { + let _timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_PUBKEY_CACHE_LOCK); + self.validator_pubkey_cache + .write() + .import_new_pubkeys(&state)? + }; // Apply the state to the attester cache, only if it is from the previous epoch or later. // @@ -4116,18 +4090,13 @@ impl BeaconChain { for relative_epoch in [RelativeEpoch::Current, RelativeEpoch::Next] { let shuffling_id = AttestationShufflingId::new(block_root, state, relative_epoch)?; - let shuffling_is_cached = self - .shuffling_cache - .try_read_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .ok_or(Error::AttestationCacheLockTimeout)? - .contains(&shuffling_id); + let shuffling_is_cached = self.shuffling_cache.read().contains(&shuffling_id); if !shuffling_is_cached { state.build_committee_cache(relative_epoch, &self.spec)?; let committee_cache = state.committee_cache(relative_epoch)?; self.shuffling_cache - .try_write_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .ok_or(Error::AttestationCacheLockTimeout)? + .write() .insert_committee_cache(shuffling_id, committee_cache); } } @@ -4174,14 +4143,12 @@ impl BeaconChain { ) }; - if let Some(finalized_eth1_data) = self - .eth1_finalization_cache - .try_write_for(ETH1_FINALIZATION_CACHE_LOCK_TIMEOUT) - .and_then(|mut cache| { - cache.insert(checkpoint, eth1_finalization_data); - cache.finalize(¤t_finalized_checkpoint) - }) - { + let finalized_eth1_data = { + let mut cache = self.eth1_finalization_cache.write(); + cache.insert(checkpoint, eth1_finalization_data); + cache.finalize(¤t_finalized_checkpoint) + }; + if let Some(finalized_eth1_data) = finalized_eth1_data { if let Some(eth1_chain) = self.eth1_chain.as_ref() { let finalized_deposit_count = finalized_eth1_data.deposit_count; eth1_chain.finalize_eth1_data(finalized_eth1_data); @@ -6365,15 +6332,11 @@ impl BeaconChain { })?; // Obtain the shuffling cache, timing how long we wait. - let cache_wait_timer = - metrics::start_timer(&metrics::ATTESTATION_PROCESSING_SHUFFLING_CACHE_WAIT_TIMES); - - let mut shuffling_cache = self - .shuffling_cache - .try_write_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .ok_or(Error::AttestationCacheLockTimeout)?; - - metrics::stop_timer(cache_wait_timer); + let mut shuffling_cache = { + let _ = + metrics::start_timer(&metrics::ATTESTATION_PROCESSING_SHUFFLING_CACHE_WAIT_TIMES); + self.shuffling_cache.write() + }; if let Some(cache_item) = shuffling_cache.get(&shuffling_id) { // The shuffling cache is no longer required, drop the write-lock to allow concurrent @@ -6481,8 +6444,7 @@ impl BeaconChain { let shuffling_decision_block = shuffling_id.shuffling_decision_block; self.shuffling_cache - .try_write_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .ok_or(Error::AttestationCacheLockTimeout)? + .write() .insert_committee_cache(shuffling_id, &committee_cache); metrics::stop_timer(committee_building_timer); diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index c71a2bcab3f..832eaccc803 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -64,7 +64,7 @@ use crate::observed_block_producers::SeenBlock; use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS; use crate::validator_pubkey_cache::ValidatorPubkeyCache; use crate::{ - beacon_chain::{BeaconForkChoice, ForkChoiceError, VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT}, + beacon_chain::{BeaconForkChoice, ForkChoiceError}, metrics, BeaconChain, BeaconChainError, BeaconChainTypes, }; use derivative::Derivative; @@ -2096,10 +2096,7 @@ pub fn cheap_state_advance_to_obtain_committees<'a, E: EthSpec, Err: BlockBlobEr pub fn get_validator_pubkey_cache( chain: &BeaconChain, ) -> Result>, BeaconChainError> { - chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout) + Ok(chain.validator_pubkey_cache.read()) } /// Produces an _empty_ `BlockSignatureVerifier`. diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 7217f2c640f..14e61e12653 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -13,7 +13,6 @@ use crate::light_client_server_cache::LightClientServerCache; use crate::migrate::{BackgroundMigrator, MigratorConfig}; use crate::persisted_beacon_chain::PersistedBeaconChain; use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache}; -use crate::timeout_rw_lock::TimeoutRwLock; use crate::validator_monitor::{ValidatorMonitor, ValidatorMonitorConfig}; use crate::validator_pubkey_cache::ValidatorPubkeyCache; use crate::ChainConfig; @@ -935,16 +934,16 @@ where fork_choice_signal_rx, event_handler: self.event_handler, head_tracker, - shuffling_cache: TimeoutRwLock::new(ShufflingCache::new( + shuffling_cache: RwLock::new(ShufflingCache::new( shuffling_cache_size, head_shuffling_ids, log.clone(), )), - eth1_finalization_cache: TimeoutRwLock::new(Eth1FinalizationCache::new(log.clone())), + eth1_finalization_cache: RwLock::new(Eth1FinalizationCache::new(log.clone())), beacon_proposer_cache, block_times_cache: <_>::default(), pre_finalization_block_cache: <_>::default(), - validator_pubkey_cache: TimeoutRwLock::new(validator_pubkey_cache), + validator_pubkey_cache: RwLock::new(validator_pubkey_cache), attester_cache: <_>::default(), early_attester_cache: <_>::default(), reqresp_pre_import_cache: <_>::default(), diff --git a/beacon_node/beacon_chain/src/canonical_head.rs b/beacon_node/beacon_chain/src/canonical_head.rs index 84e1544451c..a5d85d56032 100644 --- a/beacon_node/beacon_chain/src/canonical_head.rs +++ b/beacon_node/beacon_chain/src/canonical_head.rs @@ -31,7 +31,6 @@ //! the head block root. This is unacceptable for fast-responding functions like the networking //! stack. -use crate::beacon_chain::ATTESTATION_CACHE_LOCK_TIMEOUT; use crate::persisted_fork_choice::PersistedForkChoice; use crate::shuffling_cache::BlockShufflingIds; use crate::{ @@ -817,21 +816,10 @@ impl BeaconChain { new_snapshot.beacon_block_root, &new_snapshot.beacon_state, ) { - Ok(head_shuffling_ids) => { - self.shuffling_cache - .try_write_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .map(|mut shuffling_cache| { - shuffling_cache.update_head_shuffling_ids(head_shuffling_ids) - }) - .unwrap_or_else(|| { - error!( - self.log, - "Failed to obtain cache write lock"; - "lock" => "shuffling_cache", - "task" => "update head shuffling decision root" - ); - }); - } + Ok(head_shuffling_ids) => self + .shuffling_cache + .write() + .update_head_shuffling_ids(head_shuffling_ids), Err(e) => { error!( self.log, diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index 255b8f00497..c908efa07c3 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -28,8 +28,6 @@ pub struct ChainConfig { pub weak_subjectivity_checkpoint: Option, /// Determine whether to reconstruct historic states, usually after a checkpoint sync. pub reconstruct_historic_states: bool, - /// Whether timeouts on `TimeoutRwLock`s are enabled or not. - pub enable_lock_timeouts: bool, /// The max size of a message that can be sent over the network. pub max_network_size: usize, /// Maximum percentage of the head committee weight at which to attempt re-orging the canonical head. @@ -94,7 +92,6 @@ impl Default for ChainConfig { import_max_skip_slots: None, weak_subjectivity_checkpoint: None, reconstruct_historic_states: false, - enable_lock_timeouts: true, max_network_size: 10 * 1_048_576, // 10M re_org_head_threshold: Some(DEFAULT_RE_ORG_HEAD_THRESHOLD), re_org_parent_threshold: Some(DEFAULT_RE_ORG_PARENT_THRESHOLD), diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index e1d0f61c58c..5f3ccac4e4a 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -57,7 +57,6 @@ pub mod state_advance_timer; pub mod sync_committee_rewards; pub mod sync_committee_verification; pub mod test_utils; -mod timeout_rw_lock; pub mod validator_monitor; pub mod validator_pubkey_cache; @@ -98,5 +97,4 @@ pub use state_processing::per_block_processing::errors::{ ExitValidationError, ProposerSlashingValidationError, }; pub use store; -pub use timeout_rw_lock::TimeoutRwLock; pub use types; diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index ab547cb6006..4ca511370d6 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -93,6 +93,10 @@ lazy_static! { "Time spent running fork choice's `get_head` during block import", exponential_buckets(1e-3, 2.0, 8) ); + pub static ref BLOCK_PROCESSING_PUBKEY_CACHE_LOCK: Result = try_create_histogram( + "beacon_block_processing_pubkey_cache_lock_seconds", + "Time spent waiting or holding the pubkey cache write lock", + ); pub static ref BLOCK_SYNC_AGGREGATE_SET_BITS: Result = try_create_int_gauge( "block_sync_aggregate_set_bits", "The number of true bits in the last sync aggregate in a block" diff --git a/beacon_node/beacon_chain/src/state_advance_timer.rs b/beacon_node/beacon_chain/src/state_advance_timer.rs index 1f928a16e42..1d8bfff216b 100644 --- a/beacon_node/beacon_chain/src/state_advance_timer.rs +++ b/beacon_node/beacon_chain/src/state_advance_timer.rs @@ -15,8 +15,7 @@ //! 2. There's a possibility that the head block is never built upon, causing wasted CPU cycles. use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS; use crate::{ - beacon_chain::ATTESTATION_CACHE_LOCK_TIMEOUT, chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR, - BeaconChain, BeaconChainError, BeaconChainTypes, + chain_config::FORK_CHOICE_LOOKAHEAD_FACTOR, BeaconChain, BeaconChainError, BeaconChainTypes, }; use slog::{debug, error, warn, Logger}; use slot_clock::SlotClock; @@ -417,8 +416,7 @@ fn advance_head( .map_err(BeaconChainError::from)?; beacon_chain .shuffling_cache - .try_write_for(ATTESTATION_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::AttestationCacheLockTimeout)? + .write() .insert_committee_cache(shuffling_id.clone(), committee_cache); debug!( diff --git a/beacon_node/beacon_chain/src/sync_committee_verification.rs b/beacon_node/beacon_chain/src/sync_committee_verification.rs index 5c6710bfd6c..e1a5de56d1c 100644 --- a/beacon_node/beacon_chain/src/sync_committee_verification.rs +++ b/beacon_node/beacon_chain/src/sync_committee_verification.rs @@ -28,8 +28,7 @@ use crate::observed_attesters::SlotSubcommitteeIndex; use crate::{ - beacon_chain::VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT, metrics, - observed_aggregates::ObserveOutcome, BeaconChain, BeaconChainError, BeaconChainTypes, + metrics, observed_aggregates::ObserveOutcome, BeaconChain, BeaconChainError, BeaconChainTypes, }; use bls::{verify_signature_sets, PublicKeyBytes}; use derivative::Derivative; @@ -619,10 +618,7 @@ pub fn verify_signed_aggregate_signatures( signed_aggregate: &SignedContributionAndProof, participant_pubkeys: &[PublicKeyBytes], ) -> Result { - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let aggregator_index = signed_aggregate.message.aggregator_index; if aggregator_index >= pubkey_cache.len() as u64 { @@ -683,10 +679,7 @@ pub fn verify_sync_committee_message( let signature_setup_timer = metrics::start_timer(&metrics::SYNC_MESSAGE_PROCESSING_SIGNATURE_SETUP_TIMES); - let pubkey_cache = chain - .validator_pubkey_cache - .try_read_for(VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT) - .ok_or(BeaconChainError::ValidatorPubkeyCacheLockTimeout)?; + let pubkey_cache = chain.validator_pubkey_cache.read(); let pubkey = pubkey_cache .get_pubkey_from_pubkey_bytes(pubkey_bytes) diff --git a/beacon_node/beacon_chain/src/timeout_rw_lock.rs b/beacon_node/beacon_chain/src/timeout_rw_lock.rs deleted file mode 100644 index b2eea762651..00000000000 --- a/beacon_node/beacon_chain/src/timeout_rw_lock.rs +++ /dev/null @@ -1,48 +0,0 @@ -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::time::Duration; - -/// A simple wrapper around `parking_lot::RwLock` that only permits read/write access with a -/// time-out (i.e., no indefinitely-blocking operations). -/// -/// Timeouts can be optionally disabled at runtime for all instances of this type by calling -/// `TimeoutRwLock::disable_timeouts()`. -pub struct TimeoutRwLock(RwLock); - -const TIMEOUT_LOCKS_ENABLED_DEFAULT: bool = true; -static TIMEOUT_LOCKS_ENABLED: AtomicBool = AtomicBool::new(TIMEOUT_LOCKS_ENABLED_DEFAULT); - -impl TimeoutRwLock<()> { - pub fn disable_timeouts() { - // Use the strongest `SeqCst` ordering for the write, as it should only happen once. - TIMEOUT_LOCKS_ENABLED.store(false, Ordering::SeqCst); - } -} - -impl TimeoutRwLock { - pub fn new(inner: T) -> Self { - Self(RwLock::new(inner)) - } - - fn timeouts_enabled() -> bool { - // Use relaxed ordering as it's OK for a few locks to run with timeouts "accidentally", - // and we want the atomic check to be as fast as possible. - TIMEOUT_LOCKS_ENABLED.load(Ordering::Relaxed) - } - - pub fn try_read_for(&self, timeout: Duration) -> Option> { - if Self::timeouts_enabled() { - self.0.try_read_for(timeout) - } else { - Some(self.0.read()) - } - } - - pub fn try_write_for(&self, timeout: Duration) -> Option> { - if Self::timeouts_enabled() { - self.0.try_write_for(timeout) - } else { - Some(self.0.write()) - } - } -} diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 689c3437a3d..2e1b1c093c8 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -1246,9 +1246,7 @@ pub fn cli_app() -> Command { .arg( Arg::new("disable-lock-timeouts") .long("disable-lock-timeouts") - .help("Disable the timeouts applied to some internal locks by default. This can \ - lead to less spurious failures on slow hardware but is considered \ - experimental as it may obscure performance issues.") + .help("This flag is deprecated and has no effect.") .action(ArgAction::SetTrue) .help_heading(FLAG_HEADER) .display_order(0) diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 6bde0411afe..b4fa38da7d7 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -753,7 +753,11 @@ pub fn get_config( } if cli_args.get_flag("disable-lock-timeouts") { - client_config.chain.enable_lock_timeouts = false; + warn!( + log, + "Ignoring --disable-lock-timeouts"; + "info" => "this flag is deprecated and will be removed" + ); } if cli_args.get_flag("disable-proposer-reorgs") { diff --git a/beacon_node/src/lib.rs b/beacon_node/src/lib.rs index 40b667a7447..945bd787dd4 100644 --- a/beacon_node/src/lib.rs +++ b/beacon_node/src/lib.rs @@ -5,7 +5,6 @@ pub use beacon_chain; use beacon_chain::store::LevelDB; use beacon_chain::{ builder::Witness, eth1_chain::CachingEth1Backend, slot_clock::SystemTimeSlotClock, - TimeoutRwLock, }; use clap::ArgMatches; pub use cli::cli_app; @@ -73,11 +72,6 @@ impl ProductionBeaconNode { ) } - if !client_config.chain.enable_lock_timeouts { - info!(log, "Disabling lock timeouts globally"); - TimeoutRwLock::disable_timeouts() - } - if let Err(misaligned_forks) = validator_fork_epochs(&spec) { warn!( log, diff --git a/book/src/faq.md b/book/src/faq.md index 2de7841343c..4c58c2e16d1 100644 --- a/book/src/faq.md +++ b/book/src/faq.md @@ -8,7 +8,6 @@ - [My beacon node is stuck at downloading historical block using checkpoint sync. What should I do?](#bn-download-historical) - [I proposed a block but the beacon node shows `could not publish message` with error `duplicate` as below, should I be worried?](#bn-duplicate) - [I see beacon node logs `Head is optimistic` and I am missing attestations. What should I do?](#bn-optimistic) -- [My beacon node logs `CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTimeout`, what should I do?](#bn-timeout) - [My beacon node logs `WARN BlockProcessingFailure outcome: MissingBeaconBlock`, what should I do?](#bn-missing-beacon) - [After checkpoint sync, the progress of `downloading historical blocks` is slow. Why?](#bn-download-slow) - [My beacon node logs `WARN Error processing HTTP API request`, what should I do?](#bn-http) @@ -84,7 +83,7 @@ The `WARN Execution engine called failed` log is shown when the beacon node cann `error: HttpClient(url: http://127.0.0.1:8551/, kind: timeout, detail: operation timed out), service: exec` -which says `TimedOut` at the end of the message. This means that the execution engine has not responded in time to the beacon node. One option is to add the flags `--execution-timeout-multiplier 3` and `--disable-lock-timeouts` to the beacon node. However, if the error persists, it is worth digging further to find out the cause. There are a few reasons why this can occur: +which says `TimedOut` at the end of the message. This means that the execution engine has not responded in time to the beacon node. One option is to add the flag `--execution-timeout-multiplier 3` to the beacon node. However, if the error persists, it is worth digging further to find out the cause. There are a few reasons why this can occur: 1. The execution engine is not synced. Check the log of the execution engine to make sure that it is synced. If it is syncing, wait until it is synced and the error will disappear. You will see the beacon node logs `INFO Execution engine online` when it is synced. 1. The computer is overloaded. Check the CPU and RAM usage to see if it has overloaded. You can use `htop` to check for CPU and RAM usage. @@ -140,17 +139,6 @@ WARN Head is optimistic execution_block_hash: 0x47e7555f1d4215d1ad409b1ac1 It means the beacon node will follow the chain, but it will not be able to attest or produce blocks. This is because the execution client is not synced, so the beacon chain cannot verify the authenticity of the chain head, hence the word `optimistic`. What you need to do is to make sure that the execution client is up and syncing. Once the execution client is synced, the error will disappear. -### My beacon node logs `CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTimeout, service: beacon`, what should I do? - -An example of the log is shown below: - -```text -CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTimeout, service: beacon -WARN BlockProcessingFailure outcome: ValidatorPubkeyCacheLockTimeout, msg: unexpected condition in processing block. -``` - -A `Timeout` error suggests that the computer may be overloaded at the moment, for example, the execution client is still syncing. You may use the flag `--disable-lock-timeouts` to silence this error, although it will not fix the underlying slowness. Nevertheless, this is a relatively harmless log, and the error should go away once the resources used are back to normal. - ### My beacon node logs `WARN BlockProcessingFailure outcome: MissingBeaconBlock`, what should I do? An example of the full log is shown below: diff --git a/book/src/help_bn.md b/book/src/help_bn.md index d46427970fd..f9180b65832 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -480,9 +480,7 @@ Flags: --disable-inbound-rate-limiter Disables the inbound rate limiter (requests received by this node). --disable-lock-timeouts - Disable the timeouts applied to some internal locks by default. This - can lead to less spurious failures on slow hardware but is considered - experimental as it may obscure performance issues. + This flag is deprecated and has no effect. --disable-log-timestamp If present, do not include timestamps in logging output. --disable-malloc-tuning diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index fa87fbdd816..4fdd967c65c 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -159,19 +159,11 @@ fn max_skip_slots_flag() { .with_config(|config| assert_eq!(config.chain.import_max_skip_slots, Some(10))); } -#[test] -fn enable_lock_timeouts_default() { - CommandLineTest::new() - .run_with_zero_port() - .with_config(|config| assert!(config.chain.enable_lock_timeouts)); -} - #[test] fn disable_lock_timeouts_flag() { CommandLineTest::new() .flag("disable-lock-timeouts", None) - .run_with_zero_port() - .with_config(|config| assert!(!config.chain.enable_lock_timeouts)); + .run_with_zero_port(); } #[test]