From 0cca581b40c18d1ffc897de5daa65f61df18a908 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 11:25:38 -0500 Subject: [PATCH 01/13] fix: address security and code quality issues - Panic on poisoned mutex instead of silently recovering (H3) - Bound health check timeout to 3600s max (M1) - Use hex fallback instead of empty string for bech32 failure (L1) - Floor ACK timeout at DESCRIPTOR_ACK_TIMEOUT_SECS (ACK asymmetry) - Fix clippy lint: use function reference in map - Use Option for created_at to avoid false staleness on legacy records - Update lib.rs exports after conflict resolution with main --- keep-cli/src/cli.rs | 12 ++ keep-cli/src/commands/frost_network/mod.rs | 181 ++++++++++++++++++++- keep-cli/src/commands/wallet.rs | 23 ++- keep-cli/src/main.rs | 14 +- keep-core/src/backend.rs | 5 + keep-core/src/lib.rs | 37 +++++ keep-core/src/storage.rs | 80 ++++++++- keep-core/src/wallet.rs | 33 ++++ keep-frost-net/src/descriptor_session.rs | 43 +++++ keep-frost-net/src/lib.rs | 8 +- keep-frost-net/src/node/descriptor.rs | 35 +++- keep-frost-net/src/node/mod.rs | 49 ++++++ keep-frost-net/src/protocol.rs | 17 ++ keep-mobile/src/lib.rs | 92 ++++++++++- keep-mobile/src/persistence.rs | 92 ++++++++++- keep-mobile/src/types.rs | 11 ++ 16 files changed, 705 insertions(+), 27 deletions(-) diff --git a/keep-cli/src/cli.rs b/keep-cli/src/cli.rs index 439a21e9..20b39ed1 100644 --- a/keep-cli/src/cli.rs +++ b/keep-cli/src/cli.rs @@ -203,6 +203,8 @@ pub(crate) enum WalletCommands { required = true )] recovery: Vec, + #[arg(long, help = "Session timeout in seconds (max 86400)")] + timeout: Option, }, } @@ -374,6 +376,16 @@ pub(crate) enum FrostNetworkCommands { )] count: u32, }, + HealthCheck { + #[arg(short, long)] + group: String, + #[arg(short, long)] + relay: Option, + #[arg(short, long)] + share: Option, + #[arg(long, default_value = "10", help = "Timeout in seconds")] + timeout: u64, + }, } #[derive(Subcommand)] diff --git a/keep-cli/src/commands/frost_network/mod.rs b/keep-cli/src/commands/frost_network/mod.rs index f77a7174..977f3e6b 100644 --- a/keep-cli/src/commands/frost_network/mod.rs +++ b/keep-cli/src/commands/frost_network/mod.rs @@ -70,7 +70,8 @@ pub fn cmd_frost_network_serve( .map_err(|e| KeepError::Frost(e.to_string()))?, ); - let npub = node.pubkey().to_bech32().unwrap_or_default(); + let pk = node.pubkey(); + let npub = pk.to_bech32().unwrap_or_else(|_| format!("{pk}")); out.field("Node pubkey", &npub); out.newline(); out.info("Listening for FROST messages... (Ctrl+C to stop)"); @@ -172,7 +173,7 @@ pub fn cmd_frost_network_serve( network, created_at: now, }; - let guard = keep.lock().unwrap_or_else(|e| e.into_inner()); + let guard = keep.lock().expect("keep mutex poisoned"); match guard.store_wallet_descriptor(&descriptor) { Ok(()) => { tracing::info!("wallet descriptor stored"); @@ -473,3 +474,179 @@ pub fn cmd_frost_network_sign_event( "FROST network event signing".into(), )) } + +#[tracing::instrument(skip(out), fields(path = %path.display()))] +pub fn cmd_frost_network_health_check( + out: &Output, + path: &Path, + group: &str, + relay: &str, + share_index: Option, + timeout: u64, +) -> Result<()> { + const MAX_HEALTH_CHECK_TIMEOUT_SECS: u64 = 3600; + if timeout == 0 || timeout > MAX_HEALTH_CHECK_TIMEOUT_SECS { + return Err(KeepError::InvalidInput(format!( + "timeout must be between 1 and {MAX_HEALTH_CHECK_TIMEOUT_SECS} seconds" + ))); + } + debug!(group, relay, share = ?share_index, timeout, "health check"); + + let mut keep = Keep::open(path)?; + let password = get_password("Enter password")?; + + let spinner = out.spinner("Unlocking vault..."); + keep.unlock(password.expose_secret())?; + spinner.finish(); + + let group_pubkey = keep_core::keys::npub_to_bytes(group)?; + + let share = match share_index { + Some(idx) => keep.frost_get_share_by_index(&group_pubkey, idx)?, + None => keep.frost_get_share(&group_pubkey)?, + }; + + out.newline(); + out.header("Key Health Check"); + out.field("Group", group); + out.field("Relay", relay); + out.field("Timeout", &format!("{timeout}s")); + out.newline(); + + let rt = + tokio::runtime::Runtime::new().map_err(|e| KeepError::Runtime(format!("tokio: {e}")))?; + + rt.block_on(async { + let node = std::sync::Arc::new( + keep_frost_net::KfpNode::new(share, vec![relay.to_string()]) + .await + .map_err(|e| KeepError::Frost(e.to_string()))?, + ); + + node.announce() + .await + .map_err(|e| KeepError::Frost(e.to_string()))?; + + let node_handle = tokio::spawn({ + let node = node.clone(); + async move { + if let Err(e) = node.run().await { + tracing::error!(error = %e, "FROST node error"); + } + } + }); + + let spinner = out.spinner("Discovering peers..."); + tokio::time::sleep(std::time::Duration::from_secs(3)).await; + spinner.finish(); + + let online = node.online_peers(); + out.info(&format!("{online} peer(s) discovered")); + + if online == 0 { + node_handle.abort(); + out.newline(); + out.warn("No peers discovered. Run 'keep frost network serve' on other devices first."); + return Ok::<_, KeepError>(()); + } + + let spinner = out.spinner(&format!("Pinging peers (timeout: {timeout}s)...")); + let result = node + .health_check(std::time::Duration::from_secs(timeout)) + .await + .map_err(|e| KeepError::Frost(e.to_string()))?; + spinner.finish(); + node_handle.abort(); + + out.newline(); + out.header("Results"); + + if !result.responsive.is_empty() { + let shares: Vec = result.responsive.iter().map(|s| s.to_string()).collect(); + out.field("Responsive", &shares.join(", ")); + } + if !result.unresponsive.is_empty() { + let shares: Vec = result.unresponsive.iter().map(|s| s.to_string()).collect(); + out.field("Unresponsive", &shares.join(", ")); + } + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + for &idx in &result.responsive { + let existing = keep.get_health_status(&group_pubkey, idx)?; + let created_at = existing.and_then(|s| s.created_at).unwrap_or(now); + let status = keep_core::wallet::KeyHealthStatus { + group_pubkey, + share_index: idx, + last_check_timestamp: now, + responsive: true, + created_at: Some(created_at), + }; + keep.store_health_status(&status)?; + } + for &idx in &result.unresponsive { + let existing = keep.get_health_status(&group_pubkey, idx)?; + let created_at = existing.and_then(|s| s.created_at).unwrap_or(now); + let status = keep_core::wallet::KeyHealthStatus { + group_pubkey, + share_index: idx, + last_check_timestamp: now, + responsive: false, + created_at: Some(created_at), + }; + keep.store_health_status(&status)?; + } + + out.newline(); + out.success(&format!( + "{} responsive, {} unresponsive", + result.responsive.len(), + result.unresponsive.len() + )); + + let all_statuses = keep.list_health_statuses()?; + let group_statuses: Vec<_> = all_statuses + .iter() + .filter(|s| s.group_pubkey == group_pubkey) + .collect(); + if !group_statuses.is_empty() { + out.newline(); + out.header("Health History"); + for s in &group_statuses { + let age = now.saturating_sub(s.last_check_timestamp); + let status_str = if s.responsive { + "responsive" + } else { + "unresponsive" + }; + let staleness = if s.is_critical(now) { + " [CRITICAL]" + } else if s.is_stale(now) { + " [STALE]" + } else { + "" + }; + let age_display = if age < 60 { + format!("{age}s ago") + } else if age < 3600 { + format!("{}m ago", age / 60) + } else if age < 86400 { + format!("{}h ago", age / 3600) + } else { + format!("{}d ago", age / 86400) + }; + out.field( + &format!("Share {}", s.share_index), + &format!("{status_str} ({age_display}){staleness}"), + ); + } + } + + Ok::<_, KeepError>(()) + })?; + + Ok(()) +} diff --git a/keep-cli/src/commands/wallet.rs b/keep-cli/src/commands/wallet.rs index a529f974..6ee98b32 100644 --- a/keep-cli/src/commands/wallet.rs +++ b/keep-cli/src/commands/wallet.rs @@ -417,8 +417,18 @@ pub fn cmd_wallet_propose( relay: &str, share_index: Option, recovery: &[String], + timeout_secs: Option, ) -> Result<()> { - debug!(group, network, relay, share = ?share_index, "wallet propose"); + debug!(group, network, relay, share = ?share_index, timeout = ?timeout_secs, "wallet propose"); + + if let Some(t) = timeout_secs { + if t == 0 || t > keep_frost_net::DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS { + return Err(KeepError::InvalidInput(format!( + "timeout must be between 1 and {} seconds", + keep_frost_net::DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS + ))); + } + } if !keep_frost_net::VALID_NETWORKS.contains(&network) { return Err(KeepError::InvalidInput(format!( @@ -513,7 +523,7 @@ pub fn cmd_wallet_propose( let spinner = out.spinner("Sending descriptor proposal..."); let session_id = node - .request_descriptor(policy, network, &xpub, &fingerprint) + .request_descriptor_with_timeout(policy, network, &xpub, &fingerprint, timeout_secs) .await .map_err(|e| KeepError::Frost(e.to_string()))?; spinner.finish(); @@ -537,7 +547,8 @@ pub fn cmd_wallet_propose( let spinner = out.spinner(&format!( "Waiting for contributions (0/{remaining_contributions})..." )); - let timeout = Duration::from_secs(keep_frost_net::DESCRIPTOR_SESSION_TIMEOUT_SECS); + let effective_timeout = timeout_secs.unwrap_or(keep_frost_net::DESCRIPTOR_SESSION_TIMEOUT_SECS); + let timeout = Duration::from_secs(effective_timeout); let deadline = tokio::time::Instant::now() + timeout; let mut received = 0usize; @@ -604,8 +615,10 @@ pub fn cmd_wallet_propose( spinner.finish(); let spinner = out.spinner("Waiting for ACKs..."); - let ack_deadline = - tokio::time::Instant::now() + Duration::from_secs(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS); + let ack_timeout = timeout_secs + .map(|t| t.max(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS).min(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS * 4)) + .unwrap_or(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS); + let ack_deadline = tokio::time::Instant::now() + Duration::from_secs(ack_timeout); let mut external_descriptor = String::new(); let mut internal_descriptor = String::new(); diff --git a/keep-cli/src/main.rs b/keep-cli/src/main.rs index 2dd2f771..f8ca3e24 100644 --- a/keep-cli/src/main.rs +++ b/keep-cli/src/main.rs @@ -342,6 +342,17 @@ fn dispatch_frost_network( out, path, &group, relay, &hardware, count, ) } + FrostNetworkCommands::HealthCheck { + group, + relay, + share, + timeout, + } => { + let relay = relay.as_deref().unwrap_or(default_relay); + commands::frost_network::cmd_frost_network_health_check( + out, path, &group, relay, share, timeout, + ) + } } } @@ -447,10 +458,11 @@ fn dispatch_wallet( relay, share, recovery, + timeout, } => { let relay = relay.as_deref().unwrap_or_else(|| cfg.default_relay()); commands::wallet::cmd_wallet_propose( - out, path, &group, &network, relay, share, &recovery, + out, path, &group, &network, relay, share, &recovery, timeout, ) } } diff --git a/keep-core/src/backend.rs b/keep-core/src/backend.rs index b5cef177..ab53497f 100644 --- a/keep-core/src/backend.rs +++ b/keep-core/src/backend.rs @@ -23,6 +23,8 @@ pub const DESCRIPTORS_TABLE: &str = "wallet_descriptors"; pub const RELAY_CONFIGS_TABLE: &str = "relay_configs"; /// Table name for application configuration. pub const CONFIG_TABLE: &str = "config"; +/// Table name for key health status records. +pub const HEALTH_STATUS_TABLE: &str = "key_health_status"; /// Trait for pluggable storage backends. /// @@ -83,6 +85,8 @@ const DESCRIPTORS_TABLE_DEF: TableDefinition<&[u8], &[u8]> = const RELAY_CONFIGS_TABLE_DEF: TableDefinition<&[u8], &[u8]> = TableDefinition::new("relay_configs"); const CONFIG_TABLE_DEF: TableDefinition<&[u8], &[u8]> = TableDefinition::new("config"); +const HEALTH_STATUS_TABLE_DEF: TableDefinition<&[u8], &[u8]> = + TableDefinition::new("key_health_status"); /// Redb-based storage backend (default). pub struct RedbBackend { @@ -148,6 +152,7 @@ impl RedbBackend { DESCRIPTORS_TABLE => Ok(DESCRIPTORS_TABLE_DEF), RELAY_CONFIGS_TABLE => Ok(RELAY_CONFIGS_TABLE_DEF), CONFIG_TABLE => Ok(CONFIG_TABLE_DEF), + HEALTH_STATUS_TABLE => Ok(HEALTH_STATUS_TABLE_DEF), _ => Err(StorageError::database(format!("unknown table: {name}")).into()), } } diff --git a/keep-core/src/lib.rs b/keep-core/src/lib.rs index 1592557e..35a5a28a 100644 --- a/keep-core/src/lib.rs +++ b/keep-core/src/lib.rs @@ -665,6 +665,43 @@ impl Keep { self.storage.delete_descriptor(group_pubkey) } + /// Store a key health status record. + pub fn store_health_status(&self, status: &wallet::KeyHealthStatus) -> Result<()> { + if !self.is_unlocked() { + return Err(KeepError::Locked); + } + self.storage.store_health_status(status) + } + + /// Get a key health status record. + pub fn get_health_status( + &self, + group_pubkey: &[u8; 32], + share_index: u16, + ) -> Result> { + if !self.is_unlocked() { + return Err(KeepError::Locked); + } + self.storage.get_health_status(group_pubkey, share_index) + } + + /// List all key health status records. + pub fn list_health_statuses(&self) -> Result> { + if !self.is_unlocked() { + return Err(KeepError::Locked); + } + self.storage.list_health_statuses() + } + + /// List health statuses that are stale (not checked within the threshold). + pub fn list_stale_health_statuses(&self, now: u64) -> Result> { + if !self.is_unlocked() { + return Err(KeepError::Locked); + } + let all = self.storage.list_health_statuses()?; + Ok(all.into_iter().filter(|s| s.is_stale(now)).collect()) + } + /// Store relay configuration for a FROST share. pub fn store_relay_config(&self, config: &RelayConfig) -> Result<()> { if !self.is_unlocked() { diff --git a/keep-core/src/storage.rs b/keep-core/src/storage.rs index daafce2c..e6ab50c4 100644 --- a/keep-core/src/storage.rs +++ b/keep-core/src/storage.rs @@ -8,8 +8,8 @@ use std::path::{Path, PathBuf}; use tracing::{debug, trace}; use crate::backend::{ - RedbBackend, StorageBackend, CONFIG_TABLE, DESCRIPTORS_TABLE, KEYS_TABLE, RELAY_CONFIGS_TABLE, - SHARES_TABLE, + RedbBackend, StorageBackend, CONFIG_TABLE, DESCRIPTORS_TABLE, HEALTH_STATUS_TABLE, KEYS_TABLE, + RELAY_CONFIGS_TABLE, SHARES_TABLE, }; use crate::crypto::{self, Argon2Params, EncryptedData, SecretKey, SALT_SIZE}; use crate::error::{KeepError, Result, StorageError}; @@ -17,7 +17,7 @@ use crate::frost::StoredShare; use crate::keys::KeyRecord; use crate::rate_limit; use crate::relay::RelayConfig; -use crate::wallet::WalletDescriptor; +use crate::wallet::{KeyHealthStatus, WalletDescriptor}; use bincode::Options; @@ -245,6 +245,7 @@ impl Storage { backend.create_table(DESCRIPTORS_TABLE)?; backend.create_table(RELAY_CONFIGS_TABLE)?; backend.create_table(CONFIG_TABLE)?; + backend.create_table(HEALTH_STATUS_TABLE)?; Ok(Self { path: path.to_path_buf(), @@ -717,6 +718,79 @@ impl Storage { backend.put(CONFIG_TABLE, b"kill_switch", &encrypted_bytes)?; Ok(()) } + + /// Store a key health status record. + pub fn store_health_status(&self, status: &KeyHealthStatus) -> Result<()> { + debug!( + group = %hex::encode(status.group_pubkey), + share = status.share_index, + responsive = status.responsive, + "storing health status" + ); + let data_key = self.data_key.as_ref().ok_or(KeepError::Locked)?; + let backend = self.backend.as_ref().ok_or(KeepError::Locked)?; + + let key = health_status_key(&status.group_pubkey, status.share_index); + let serialized = serde_json::to_vec(status) + .map_err(|e| KeepError::Other(format!("json serialization failed: {e}")))?; + let encrypted = crypto::encrypt(&serialized, data_key)?; + backend.put(HEALTH_STATUS_TABLE, key.as_bytes(), &encrypted.to_bytes())?; + Ok(()) + } + + /// Get a key health status record. + pub fn get_health_status( + &self, + group_pubkey: &[u8; 32], + share_index: u16, + ) -> Result> { + let data_key = self.data_key.as_ref().ok_or(KeepError::Locked)?; + let backend = self.backend.as_ref().ok_or(KeepError::Locked)?; + + let key = health_status_key(group_pubkey, share_index); + let Some(encrypted_bytes) = backend.get(HEALTH_STATUS_TABLE, key.as_bytes())? else { + return Ok(None); + }; + + let encrypted = crypto::EncryptedData::from_bytes(&encrypted_bytes)?; + let decrypted = crypto::decrypt(&encrypted, data_key)?; + let decrypted_bytes = decrypted.as_slice()?; + let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) + .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; + Ok(Some(status)) + } + + /// List all key health status records. + pub fn list_health_statuses(&self) -> Result> { + let data_key = self.data_key.as_ref().ok_or(KeepError::Locked)?; + let backend = self.backend.as_ref().ok_or(KeepError::Locked)?; + + let entries = backend.list(HEALTH_STATUS_TABLE)?; + let mut statuses = Vec::new(); + + for (_, encrypted_bytes) in entries { + let encrypted = crypto::EncryptedData::from_bytes(&encrypted_bytes)?; + let decrypted = crypto::decrypt(&encrypted, data_key)?; + let decrypted_bytes = decrypted.as_slice()?; + let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) + .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; + statuses.push(status); + } + + Ok(statuses) + } + + /// Delete a key health status record. + pub fn delete_health_status(&self, group_pubkey: &[u8; 32], share_index: u16) -> Result<()> { + let backend = self.backend.as_ref().ok_or(KeepError::Locked)?; + let key = health_status_key(group_pubkey, share_index); + backend.delete(HEALTH_STATUS_TABLE, key.as_bytes())?; + Ok(()) + } +} + +fn health_status_key(group_pubkey: &[u8; 32], share_index: u16) -> String { + format!("{}:{}", hex::encode(group_pubkey), share_index) } pub(crate) fn share_id(group_pubkey: &[u8; 32], identifier: u16) -> [u8; 32] { diff --git a/keep-core/src/wallet.rs b/keep-core/src/wallet.rs index d19fcbc4..a0fa12b7 100644 --- a/keep-core/src/wallet.rs +++ b/keep-core/src/wallet.rs @@ -5,6 +5,39 @@ use serde::{Deserialize, Serialize}; +/// Health status of a key share from a liveness check. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct KeyHealthStatus { + /// The FROST group public key. + pub group_pubkey: [u8; 32], + /// The share index that was checked. + pub share_index: u16, + /// Unix timestamp of the last health check. + pub last_check_timestamp: u64, + /// Whether the share was responsive. + pub responsive: bool, + /// Unix timestamp when this record was first created (None for legacy records). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub created_at: Option, +} + +/// 24 hours - key not checked in this period is considered stale. +pub const KEY_HEALTH_STALE_THRESHOLD_SECS: u64 = 86400; +/// 7 days - key not checked in this period is critically stale. +pub const KEY_HEALTH_CRITICAL_THRESHOLD_SECS: u64 = 604800; + +impl KeyHealthStatus { + /// Returns true if the last check is older than the stale threshold (24h). + pub fn is_stale(&self, now: u64) -> bool { + now.saturating_sub(self.last_check_timestamp) >= KEY_HEALTH_STALE_THRESHOLD_SECS + } + + /// Returns true if the last check is older than the critical threshold (7d). + pub fn is_critical(&self, now: u64) -> bool { + now.saturating_sub(self.last_check_timestamp) >= KEY_HEALTH_CRITICAL_THRESHOLD_SECS + } +} + /// A finalized wallet descriptor associated with a FROST group. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct WalletDescriptor { diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index 477c3b5a..c61169cc 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -432,6 +432,49 @@ impl DescriptorSessionManager { .ok_or_else(|| FrostNetError::Session("Failed to retrieve created session".into())) } + #[allow(clippy::too_many_arguments)] + pub fn create_session_with_timeout( + &mut self, + session_id: [u8; 32], + group_pubkey: [u8; 32], + policy: WalletPolicy, + network: String, + expected_contributors: HashSet, + expected_acks: HashSet, + timeout: Option, + ) -> Result<&mut DescriptorSession> { + if let Some(existing) = self.sessions.get(&session_id) { + if !existing.is_expired() { + return Err(FrostNetError::Session( + "Descriptor session already active".into(), + )); + } + self.sessions.remove(&session_id); + } + + self.cleanup_expired(); + + if self.sessions.len() >= MAX_SESSIONS { + return Err(FrostNetError::Session( + "Maximum number of descriptor sessions reached".into(), + )); + } + + let effective_timeout = timeout.unwrap_or(self.default_timeout); + let session = DescriptorSession::new( + session_id, + group_pubkey, + policy, + network, + expected_contributors, + expected_acks, + effective_timeout, + ); + + self.sessions.insert(session_id, session); + Ok(self.sessions.get_mut(&session_id).unwrap()) + } + pub fn get_session(&self, session_id: &[u8; 32]) -> Option<&DescriptorSession> { self.sessions.get(session_id).filter(|s| !s.is_expired()) } diff --git a/keep-frost-net/src/lib.rs b/keep-frost-net/src/lib.rs index 1557adb9..1c315d2b 100644 --- a/keep-frost-net/src/lib.rs +++ b/keep-frost-net/src/lib.rs @@ -84,7 +84,9 @@ pub use ecdh::{ }; pub use error::{FrostNetError, Result}; pub use event::KfpEventBuilder; -pub use node::{KfpNode, KfpNodeEvent, NoOpHooks, PeerPolicy, SessionInfo, SigningHooks}; +pub use node::{ + HealthCheckResult, KfpNode, KfpNodeEvent, NoOpHooks, PeerPolicy, SessionInfo, SigningHooks, +}; pub use nonce_store::{FileNonceStore, MemoryNonceStore, NonceStore}; pub use peer::{AttestationStatus, Peer, PeerManager, PeerStatus}; pub use protocol::{ @@ -94,7 +96,9 @@ pub use protocol::{ EnclaveAttestation, ErrorPayload, KeySlot, KfpMessage, PingPayload, PolicyTier, PongPayload, RefreshCompletePayload, RefreshRequestPayload, RefreshRound1Payload, RefreshRound2Payload, SignRequestPayload, SignatureCompletePayload, SignatureSharePayload, WalletPolicy, - XpubAnnouncePayload, DEFAULT_REPLAY_WINDOW_SECS, DESCRIPTOR_ACK_TIMEOUT_SECS, + XpubAnnouncePayload, DEFAULT_REPLAY_WINDOW_SECS, DESCRIPTOR_ACK_PHASE_TIMEOUT_SECS, + DESCRIPTOR_ACK_TIMEOUT_SECS, DESCRIPTOR_CONTRIBUTION_TIMEOUT_SECS, + DESCRIPTOR_FINALIZE_TIMEOUT_SECS, DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS, DESCRIPTOR_SESSION_TIMEOUT_SECS, KFP_EVENT_KIND, KFP_VERSION, MAX_CAPABILITIES, MAX_CAPABILITY_LENGTH, MAX_COMMITMENT_SIZE, MAX_DESCRIPTOR_LENGTH, MAX_ERROR_CODE_LENGTH, MAX_ERROR_MESSAGE_LENGTH, MAX_FINGERPRINT_LENGTH, MAX_KEYS_PER_TIER, MAX_MESSAGE_SIZE, diff --git a/keep-frost-net/src/node/descriptor.rs b/keep-frost-net/src/node/descriptor.rs index 03ef4c24..bc3ebbf4 100644 --- a/keep-frost-net/src/node/descriptor.rs +++ b/keep-frost-net/src/node/descriptor.rs @@ -25,11 +25,18 @@ impl KfpNode { own_xpub: &str, own_fingerprint: &str, ) -> Result<[u8; 32]> { - if !VALID_NETWORKS.contains(&network) { - return Err(FrostNetError::Session(format!( - "Invalid network: {network}" - ))); - } + self.request_descriptor_with_timeout(policy, network, own_xpub, own_fingerprint, None) + .await + } + + pub async fn request_descriptor_with_timeout( + &self, + policy: WalletPolicy, + network: &str, + own_xpub: &str, + own_fingerprint: &str, + timeout_secs: Option, + ) -> Result<[u8; 32]> { let our_index = self.share.metadata.identifier; @@ -50,15 +57,18 @@ impl KfpNode { .collect() }; + let session_timeout = timeout_secs.map(std::time::Duration::from_secs); + { let mut sessions = self.descriptor_sessions.write(); - let session = sessions.create_session( + let session = sessions.create_session_with_timeout( session_id, self.group_pubkey, policy.clone(), network.to_string(), expected_contributors, expected_acks, + session_timeout, )?; session.set_initiator(self.keys.public_key()); @@ -74,7 +84,7 @@ impl KfpNode { } } - let payload = DescriptorProposePayload::new( + let mut payload = DescriptorProposePayload::new( session_id, self.group_pubkey, created_at, @@ -83,6 +93,9 @@ impl KfpNode { own_xpub, own_fingerprint, ); + if let Some(t) = timeout_secs { + payload = payload.with_timeout(t); + } let msg = KfpMessage::DescriptorPropose(payload); let json = msg.to_json()?; @@ -184,15 +197,21 @@ impl KfpNode { let our_index = self.share.metadata.identifier; let we_are_contributor = expected_contributors.contains(&our_index); + let propose_timeout = payload + .timeout_secs + .filter(|&t| t > 0 && t <= DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS) + .map(std::time::Duration::from_secs); + let session_created = { let mut sessions = self.descriptor_sessions.write(); - match sessions.create_session( + match sessions.create_session_with_timeout( payload.session_id, self.group_pubkey, payload.policy.clone(), payload.network.clone(), expected_contributors, HashSet::new(), + propose_timeout, ) { Ok(session) => { session.set_initiator(sender); diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index fc28b560..5de20ec6 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -109,6 +109,12 @@ impl SigningHooks for NoOpHooks { fn post_sign(&self, _session: &SessionInfo, _signature: &[u8; 64]) {} } +#[derive(Clone, Debug)] +pub struct HealthCheckResult { + pub responsive: Vec, + pub unresponsive: Vec, +} + /// Maximum age for announcement timestamps (5 minutes) pub(crate) const ANNOUNCE_MAX_AGE_SECS: u64 = 300; /// Maximum clock skew tolerance for future timestamps (30 seconds) @@ -183,6 +189,11 @@ pub enum KfpNodeEvent { share_index: u16, recovery_xpubs: Vec, }, + HealthCheckComplete { + group_pubkey: [u8; 32], + responsive: Vec, + unresponsive: Vec, + }, } impl std::fmt::Debug for KfpNodeEvent { @@ -283,6 +294,16 @@ impl std::fmt::Debug for KfpNodeEvent { .field("share_index", share_index) .field("xpub_count", &recovery_xpubs.len()) .finish(), + Self::HealthCheckComplete { + group_pubkey, + responsive, + unresponsive, + } => f + .debug_struct("HealthCheckComplete") + .field("group_pubkey", &hex::encode(group_pubkey)) + .field("responsive", responsive) + .field("unresponsive", unresponsive) + .finish(), } } } @@ -1044,6 +1065,34 @@ impl KfpNode { Ok(()) } + pub async fn health_check(&self, timeout: Duration) -> Result { + let all_peers: Vec = self + .peers + .read() + .all_peers() + .iter() + .map(|p| p.share_index) + .collect(); + + let responsive = self.ping_peers(timeout).await?; + let unresponsive: Vec = all_peers + .iter() + .filter(|idx| !responsive.contains(idx)) + .copied() + .collect(); + + let _ = self.event_tx.send(KfpNodeEvent::HealthCheckComplete { + group_pubkey: self.group_pubkey, + responsive: responsive.clone(), + unresponsive: unresponsive.clone(), + }); + + Ok(HealthCheckResult { + responsive, + unresponsive, + }) + } + pub async fn ping_peers(&self, timeout: Duration) -> Result> { let peers_snapshot: Vec<(u16, PublicKey, std::time::Instant)> = self .peers diff --git a/keep-frost-net/src/protocol.rs b/keep-frost-net/src/protocol.rs index 65ed40ec..a83bf7dc 100644 --- a/keep-frost-net/src/protocol.rs +++ b/keep-frost-net/src/protocol.rs @@ -25,6 +25,7 @@ pub const MIN_XPUB_LENGTH: usize = 111; pub const MAX_XPUB_LENGTH: usize = 256; pub const MAX_FINGERPRINT_LENGTH: usize = 8; pub const DESCRIPTOR_SESSION_TIMEOUT_SECS: u64 = 600; +pub const DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS: u64 = 86400; pub const DESCRIPTOR_ACK_TIMEOUT_SECS: u64 = 60; pub const DESCRIPTOR_CONTRIBUTION_TIMEOUT_SECS: u64 = 300; pub const DESCRIPTOR_FINALIZE_TIMEOUT_SECS: u64 = 120; @@ -272,6 +273,14 @@ impl KfpMessage { } } KfpMessage::DescriptorPropose(p) => { + if let Some(t) = p.timeout_secs { + if t == 0 { + return Err("timeout_secs must be greater than zero"); + } + if t > DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS { + return Err("timeout_secs exceeds maximum allowed value"); + } + } if !VALID_NETWORKS.contains(&p.network.as_str()) { return Err("Invalid network value"); } @@ -962,6 +971,8 @@ pub struct DescriptorProposePayload { pub policy: WalletPolicy, pub initiator_xpub: String, pub initiator_fingerprint: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub timeout_secs: Option, } impl DescriptorProposePayload { @@ -982,9 +993,15 @@ impl DescriptorProposePayload { policy, initiator_xpub: initiator_xpub.to_string(), initiator_fingerprint: initiator_fingerprint.to_string(), + timeout_secs: None, } } + pub fn with_timeout(mut self, timeout_secs: u64) -> Self { + self.timeout_secs = Some(timeout_secs); + self + } + pub fn is_within_replay_window(&self, window_secs: u64) -> bool { within_replay_window(self.created_at, window_secs) } diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index f99e408b..d5cdf9c9 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -29,8 +29,8 @@ pub use psbt::{PsbtInfo, PsbtInputSighash, PsbtOutputInfo, PsbtParser}; pub use storage::{SecureStorage, ShareInfo, ShareMetadataInfo, StoredShareInfo}; pub use types::{ AnnouncedXpubInfo, DescriptorProposal, DkgConfig, DkgStatus, FrostGenerationResult, - GeneratedShareInfo, PeerInfo, PeerStatus, RecoveryTierConfig, SignRequest, SignRequestMetadata, - ThresholdConfig, WalletDescriptorInfo, + GeneratedShareInfo, KeyHealthStatusInfo, PeerInfo, PeerStatus, RecoveryTierConfig, SignRequest, + SignRequestMetadata, ThresholdConfig, WalletDescriptorInfo, }; use keep_core::frost::{ @@ -80,6 +80,8 @@ const TRUSTED_WARDENS_KEY: &str = "__keep_trusted_wardens_v1"; const CERT_PINS_STORAGE_KEY: &str = "__keep_cert_pins_v1"; const DESCRIPTOR_INDEX_KEY: &str = "__keep_descriptor_index_v1"; const DESCRIPTOR_KEY_PREFIX: &str = "__keep_descriptor_"; +const HEALTH_STATUS_INDEX_KEY: &str = "__keep_health_index_v1"; +const HEALTH_STATUS_KEY_PREFIX: &str = "__keep_health_"; const DESCRIPTOR_SESSION_TIMEOUT: Duration = Duration::from_secs(600); #[derive(uniffi::Record)] @@ -95,6 +97,15 @@ struct StoredShareData { pubkey_package_bytes: Vec, } +#[uniffi::export(with_foreign)] +pub trait HealthCallbacks: Send + Sync { + fn on_health_check_complete( + &self, + responsive: Vec, + unresponsive: Vec, + ) -> Result<(), KeepMobileError>; +} + #[uniffi::export(with_foreign)] pub trait DescriptorCallbacks: Send + Sync { fn on_proposed(&self, session_id: String) -> Result<(), KeepMobileError>; @@ -170,6 +181,7 @@ pub struct KeepMobile { policy: Arc>, velocity: Arc>, descriptor_callbacks: Arc>>>, + health_callbacks: Arc>>>, descriptor_networks: Arc>>, pending_contributions: Arc>>, } @@ -239,6 +251,7 @@ impl KeepMobile { policy, velocity, descriptor_callbacks: Arc::new(RwLock::new(None)), + health_callbacks: Arc::new(RwLock::new(None)), descriptor_networks: Arc::new(std::sync::Mutex::new(HashMap::new())), pending_contributions: Arc::new(std::sync::Mutex::new(HashMap::new())), }) @@ -709,10 +722,77 @@ impl KeepMobile { }); } + pub fn set_health_callbacks(&self, callbacks: Arc) { + self.runtime.block_on(async { + *self.health_callbacks.write().await = Some(callbacks); + }); + } + + pub fn health_check(&self, timeout_secs: u64) -> Result, KeepMobileError> { + self.runtime.block_on(async { + let node_guard = self.node.read().await; + let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; + + let group_pubkey = hex::encode(node.group_pubkey()); + + let result = node + .health_check(Duration::from_secs(timeout_secs)) + .await + .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; + + if let Some(cb) = self.health_callbacks.read().await.as_ref() { + let _ = cb.on_health_check_complete( + result.responsive.clone(), + result.unresponsive.clone(), + ); + } + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let all_shares: Vec<(u16, bool)> = result + .responsive + .iter() + .map(|&idx| (idx, true)) + .chain(result.unresponsive.iter().map(|&idx| (idx, false))) + .collect(); + for (idx, responsive) in &all_shares { + let _ = persistence::persist_health_status( + &self.storage, + &KeyHealthStatusInfo { + group_pubkey: group_pubkey.clone(), + share_index: *idx, + last_check_timestamp: now, + responsive: *responsive, + created_at: now, + is_stale: false, + is_critical: false, + }, + ); + } + + Ok(result.responsive) + }) + } + + pub fn health_status_list(&self) -> Vec { + persistence::load_health_statuses(&self.storage) + } + pub fn wallet_descriptor_propose( &self, network: String, tiers: Vec, + ) -> Result { + self.wallet_descriptor_propose_with_timeout(network, tiers, None) + } + + pub fn wallet_descriptor_propose_with_timeout( + &self, + network: String, + tiers: Vec, + timeout_secs: Option, ) -> Result { if !keep_frost_net::VALID_NETWORKS.contains(&network.as_str()) { return Err(KeepMobileError::NetworkError { @@ -738,7 +818,13 @@ impl KeepMobile { validate_xpub_network(&xpub, &network)?; let session_id = node - .request_descriptor(policy, &network, &xpub, &fingerprint) + .request_descriptor_with_timeout( + policy, + &network, + &xpub, + &fingerprint, + timeout_secs, + ) .await .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; diff --git a/keep-mobile/src/persistence.rs b/keep-mobile/src/persistence.rs index 2f07f46b..36ec1ffb 100644 --- a/keep-mobile/src/persistence.rs +++ b/keep-mobile/src/persistence.rs @@ -9,11 +9,11 @@ use serde::{Deserialize, Serialize}; use crate::error::KeepMobileError; use crate::policy::{PolicyBundle, POLICY_PUBKEY_LEN}; use crate::storage::{SecureStorage, ShareMetadataInfo}; -use crate::types::WalletDescriptorInfo; +use crate::types::{KeyHealthStatusInfo, WalletDescriptorInfo}; use crate::velocity::VelocityTracker; use crate::{ - CERT_PINS_STORAGE_KEY, DESCRIPTOR_INDEX_KEY, DESCRIPTOR_KEY_PREFIX, POLICY_STORAGE_KEY, - TRUSTED_WARDENS_KEY, VELOCITY_STORAGE_KEY, + CERT_PINS_STORAGE_KEY, DESCRIPTOR_INDEX_KEY, DESCRIPTOR_KEY_PREFIX, HEALTH_STATUS_INDEX_KEY, + HEALTH_STATUS_KEY_PREFIX, POLICY_STORAGE_KEY, TRUSTED_WARDENS_KEY, VELOCITY_STORAGE_KEY, }; pub(crate) fn load_policy( @@ -314,3 +314,89 @@ pub(crate) fn delete_descriptor( } Ok(()) } + +#[derive(Serialize, Deserialize)] +struct StoredHealthStatus { + group_pubkey: String, + share_index: u16, + last_check_timestamp: u64, + responsive: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + created_at: Option, +} + +fn health_status_key(group_pubkey_hex: &str, share_index: u16) -> String { + format!("{HEALTH_STATUS_KEY_PREFIX}{group_pubkey_hex}_{share_index}") +} + +fn load_health_index(storage: &Arc) -> Vec { + match storage.load_share_by_key(HEALTH_STATUS_INDEX_KEY.into()) { + Ok(data) => serde_json::from_slice(&data).unwrap_or_default(), + Err(_) => Vec::new(), + } +} + +fn persist_health_index( + storage: &Arc, + index: &[String], +) -> Result<(), KeepMobileError> { + let data = serde_json::to_vec(index).map_err(|e| KeepMobileError::StorageError { + msg: format!("Failed to serialize health index: {e}"), + })?; + storage.store_share_by_key( + HEALTH_STATUS_INDEX_KEY.into(), + data, + storage_metadata("health_index"), + ) +} + +pub(crate) fn persist_health_status( + storage: &Arc, + info: &KeyHealthStatusInfo, +) -> Result<(), KeepMobileError> { + let stored = StoredHealthStatus { + group_pubkey: info.group_pubkey.clone(), + share_index: info.share_index, + last_check_timestamp: info.last_check_timestamp, + responsive: info.responsive, + created_at: Some(info.created_at), + }; + let data = serde_json::to_vec(&stored).map_err(|e| KeepMobileError::StorageError { + msg: format!("Failed to serialize health status: {e}"), + })?; + let key = health_status_key(&info.group_pubkey, info.share_index); + storage.store_share_by_key(key.clone(), data, storage_metadata("health_status"))?; + + let mut index = load_health_index(storage); + if !index.contains(&key) { + index.push(key); + persist_health_index(storage, &index)?; + } + Ok(()) +} + +pub(crate) fn load_health_statuses(storage: &Arc) -> Vec { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let index = load_health_index(storage); + let mut results = Vec::new(); + for key in index { + if let Ok(data) = storage.load_share_by_key(key) { + if let Ok(stored) = serde_json::from_slice::(&data) { + let stale_age = now.saturating_sub(stored.last_check_timestamp); + results.push(KeyHealthStatusInfo { + group_pubkey: stored.group_pubkey, + share_index: stored.share_index, + last_check_timestamp: stored.last_check_timestamp, + responsive: stored.responsive, + created_at: stored.created_at.unwrap_or(stored.last_check_timestamp), + is_stale: stale_age >= keep_core::wallet::KEY_HEALTH_STALE_THRESHOLD_SECS, + is_critical: stale_age >= keep_core::wallet::KEY_HEALTH_CRITICAL_THRESHOLD_SECS, + }); + } + } + } + results +} diff --git a/keep-mobile/src/types.rs b/keep-mobile/src/types.rs index 7e04788e..617eb39f 100644 --- a/keep-mobile/src/types.rs +++ b/keep-mobile/src/types.rs @@ -113,3 +113,14 @@ pub struct AnnouncedXpubInfo { pub fingerprint: String, pub label: Option, } + +#[derive(uniffi::Record, Clone, Debug)] +pub struct KeyHealthStatusInfo { + pub group_pubkey: String, + pub share_index: u16, + pub last_check_timestamp: u64, + pub responsive: bool, + pub created_at: u64, + pub is_stale: bool, + pub is_critical: bool, +} From dc1e08691f6fb63e5ac9bcdded2bac756f0d5e92 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 11:44:16 -0500 Subject: [PATCH 02/13] Simplify and harden health check and descriptor session code --- keep-cli/src/commands/frost_network/mod.rs | 65 +++++++++++----------- keep-cli/src/commands/wallet.rs | 2 +- keep-core/src/storage.rs | 4 +- keep-frost-net/src/descriptor_session.rs | 28 +--------- keep-mobile/src/lib.rs | 7 ++- keep-mobile/src/persistence.rs | 21 +++++++ 6 files changed, 64 insertions(+), 63 deletions(-) diff --git a/keep-cli/src/commands/frost_network/mod.rs b/keep-cli/src/commands/frost_network/mod.rs index 977f3e6b..608fd8b5 100644 --- a/keep-cli/src/commands/frost_network/mod.rs +++ b/keep-cli/src/commands/frost_network/mod.rs @@ -124,8 +124,12 @@ pub fn cmd_frost_network_serve( move || node.derive_account_xpub(&net) }) .await; - match derived { - Ok(Ok((xpub, fingerprint))) => { + let xpub_result = match derived { + Ok(inner) => inner, + Err(e) => Err(keep_frost_net::FrostNetError::Crypto(e.to_string())), + }; + match xpub_result { + Ok((xpub, fingerprint)) => { if let Err(e) = contribute_node .contribute_descriptor( session_id, @@ -138,9 +142,6 @@ pub fn cmd_frost_network_serve( tracing::error!(session, error = %e, "failed to contribute descriptor"); } } - Ok(Err(e)) => { - tracing::error!(session, error = %e, "failed to derive xpub for contribution"); - } Err(e) => { tracing::error!(session, error = %e, "failed to derive xpub for contribution"); } @@ -408,7 +409,10 @@ pub fn cmd_frost_network_sign( out.info("Starting FROST coordination node..."); out.field( "Node pubkey", - &node.pubkey().to_bech32().unwrap_or_default(), + &{ + let pk = node.pubkey(); + pk.to_bech32().unwrap_or_else(|_| format!("{pk}")) + }, ); out.newline(); @@ -575,26 +579,19 @@ pub fn cmd_frost_network_health_check( .unwrap_or_default() .as_secs(); - for &idx in &result.responsive { - let existing = keep.get_health_status(&group_pubkey, idx)?; - let created_at = existing.and_then(|s| s.created_at).unwrap_or(now); - let status = keep_core::wallet::KeyHealthStatus { - group_pubkey, - share_index: idx, - last_check_timestamp: now, - responsive: true, - created_at: Some(created_at), - }; - keep.store_health_status(&status)?; - } - for &idx in &result.unresponsive { + for (&idx, responsive) in result + .responsive + .iter() + .map(|i| (i, true)) + .chain(result.unresponsive.iter().map(|i| (i, false))) + { let existing = keep.get_health_status(&group_pubkey, idx)?; let created_at = existing.and_then(|s| s.created_at).unwrap_or(now); let status = keep_core::wallet::KeyHealthStatus { group_pubkey, share_index: idx, last_check_timestamp: now, - responsive: false, + responsive, created_at: Some(created_at), }; keep.store_health_status(&status)?; @@ -617,11 +614,7 @@ pub fn cmd_frost_network_health_check( out.header("Health History"); for s in &group_statuses { let age = now.saturating_sub(s.last_check_timestamp); - let status_str = if s.responsive { - "responsive" - } else { - "unresponsive" - }; + let status_str = if s.responsive { "responsive" } else { "unresponsive" }; let staleness = if s.is_critical(now) { " [CRITICAL]" } else if s.is_stale(now) { @@ -629,15 +622,7 @@ pub fn cmd_frost_network_health_check( } else { "" }; - let age_display = if age < 60 { - format!("{age}s ago") - } else if age < 3600 { - format!("{}m ago", age / 60) - } else if age < 86400 { - format!("{}h ago", age / 3600) - } else { - format!("{}d ago", age / 86400) - }; + let age_display = format_duration_ago(age); out.field( &format!("Share {}", s.share_index), &format!("{status_str} ({age_display}){staleness}"), @@ -650,3 +635,15 @@ pub fn cmd_frost_network_health_check( Ok(()) } + +fn format_duration_ago(secs: u64) -> String { + if secs < 60 { + format!("{secs}s ago") + } else if secs < 3600 { + format!("{}m ago", secs / 60) + } else if secs < 86400 { + format!("{}h ago", secs / 3600) + } else { + format!("{}d ago", secs / 86400) + } +} diff --git a/keep-cli/src/commands/wallet.rs b/keep-cli/src/commands/wallet.rs index 6ee98b32..94ced42c 100644 --- a/keep-cli/src/commands/wallet.rs +++ b/keep-cli/src/commands/wallet.rs @@ -616,7 +616,7 @@ pub fn cmd_wallet_propose( let spinner = out.spinner("Waiting for ACKs..."); let ack_timeout = timeout_secs - .map(|t| t.max(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS).min(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS * 4)) + .map(|t| t.clamp(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS, keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS * 4)) .unwrap_or(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS); let ack_deadline = tokio::time::Instant::now() + Duration::from_secs(ack_timeout); diff --git a/keep-core/src/storage.rs b/keep-core/src/storage.rs index e6ab50c4..6758876b 100644 --- a/keep-core/src/storage.rs +++ b/keep-core/src/storage.rs @@ -752,7 +752,7 @@ impl Storage { return Ok(None); }; - let encrypted = crypto::EncryptedData::from_bytes(&encrypted_bytes)?; + let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) @@ -769,7 +769,7 @@ impl Storage { let mut statuses = Vec::new(); for (_, encrypted_bytes) in entries { - let encrypted = crypto::EncryptedData::from_bytes(&encrypted_bytes)?; + let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index c61169cc..c2755944 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -399,37 +399,15 @@ impl DescriptorSessionManager { expected_contributors: HashSet, expected_acks: HashSet, ) -> Result<&mut DescriptorSession> { - if let Some(existing) = self.sessions.get(&session_id) { - if !existing.is_expired() { - return Err(FrostNetError::Session( - "Descriptor session already active".into(), - )); - } - self.sessions.remove(&session_id); - } - - let _ = self.cleanup_expired(); - - if self.sessions.len() >= MAX_SESSIONS { - return Err(FrostNetError::Session( - "Maximum number of descriptor sessions reached".into(), - )); - } - - let session = DescriptorSession::new( + self.create_session_with_timeout( session_id, group_pubkey, policy, network, expected_contributors, expected_acks, - self.default_timeout, - ); - - self.sessions.insert(session_id, session); - self.sessions - .get_mut(&session_id) - .ok_or_else(|| FrostNetError::Session("Failed to retrieve created session".into())) + None, + ) } #[allow(clippy::too_many_arguments)] diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index d5cdf9c9..a3dd404e 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -758,6 +758,11 @@ impl KeepMobile { .chain(result.unresponsive.iter().map(|&idx| (idx, false))) .collect(); for (idx, responsive) in &all_shares { + let existing_created_at = persistence::existing_created_at( + &self.storage, + &group_pubkey, + *idx, + ); let _ = persistence::persist_health_status( &self.storage, &KeyHealthStatusInfo { @@ -765,7 +770,7 @@ impl KeepMobile { share_index: *idx, last_check_timestamp: now, responsive: *responsive, - created_at: now, + created_at: existing_created_at.unwrap_or(now), is_stale: false, is_critical: false, }, diff --git a/keep-mobile/src/persistence.rs b/keep-mobile/src/persistence.rs index 36ec1ffb..45e2d2f7 100644 --- a/keep-mobile/src/persistence.rs +++ b/keep-mobile/src/persistence.rs @@ -350,6 +350,27 @@ fn persist_health_index( ) } +pub(crate) fn existing_created_at( + storage: &Arc, + group_pubkey_hex: &str, + share_index: u16, +) -> Option { + load_stored_health_status(storage, group_pubkey_hex, share_index) + .and_then(|s| s.created_at) +} + +fn load_stored_health_status( + storage: &Arc, + group_pubkey_hex: &str, + share_index: u16, +) -> Option { + let key = health_status_key(group_pubkey_hex, share_index); + storage + .load_share_by_key(key) + .ok() + .and_then(|data| serde_json::from_slice(&data).ok()) +} + pub(crate) fn persist_health_status( storage: &Arc, info: &KeyHealthStatusInfo, From 336b2acc73d43e6eba454884f10344f647dea5b7 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 12:01:18 -0500 Subject: [PATCH 03/13] Validate timeouts, handle errors, and fix formatting --- keep-cli/src/commands/frost_network/mod.rs | 17 +++++----- keep-core/src/storage.rs | 9 +++-- keep-frost-net/src/descriptor_session.rs | 38 ++++++++++++++++++---- keep-frost-net/src/node/descriptor.rs | 1 - keep-frost-net/src/node/mod.rs | 2 +- keep-mobile/src/error.rs | 3 ++ keep-mobile/src/lib.rs | 23 +++++++++---- keep-mobile/src/persistence.rs | 3 +- 8 files changed, 68 insertions(+), 28 deletions(-) diff --git a/keep-cli/src/commands/frost_network/mod.rs b/keep-cli/src/commands/frost_network/mod.rs index 608fd8b5..f5460532 100644 --- a/keep-cli/src/commands/frost_network/mod.rs +++ b/keep-cli/src/commands/frost_network/mod.rs @@ -407,13 +407,10 @@ pub fn cmd_frost_network_sign( .map_err(|e| KeepError::Frost(e.to_string()))?; out.info("Starting FROST coordination node..."); - out.field( - "Node pubkey", - &{ - let pk = node.pubkey(); - pk.to_bech32().unwrap_or_else(|_| format!("{pk}")) - }, - ); + out.field("Node pubkey", &{ + let pk = node.pubkey(); + pk.to_bech32().unwrap_or_else(|_| format!("{pk}")) + }); out.newline(); let node = std::sync::Arc::new(node); @@ -614,7 +611,11 @@ pub fn cmd_frost_network_health_check( out.header("Health History"); for s in &group_statuses { let age = now.saturating_sub(s.last_check_timestamp); - let status_str = if s.responsive { "responsive" } else { "unresponsive" }; + let status_str = if s.responsive { + "responsive" + } else { + "unresponsive" + }; let staleness = if s.is_critical(now) { " [CRITICAL]" } else if s.is_stale(now) { diff --git a/keep-core/src/storage.rs b/keep-core/src/storage.rs index 6758876b..9cd33509 100644 --- a/keep-core/src/storage.rs +++ b/keep-core/src/storage.rs @@ -784,8 +784,13 @@ impl Storage { pub fn delete_health_status(&self, group_pubkey: &[u8; 32], share_index: u16) -> Result<()> { let backend = self.backend.as_ref().ok_or(KeepError::Locked)?; let key = health_status_key(group_pubkey, share_index); - backend.delete(HEALTH_STATUS_TABLE, key.as_bytes())?; - Ok(()) + if backend.delete(HEALTH_STATUS_TABLE, key.as_bytes())? { + Ok(()) + } else { + Err(KeepError::KeyNotFound(format!( + "health status for share {share_index} not found" + ))) + } } } diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index c2755944..fa2566df 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -18,6 +18,26 @@ use crate::protocol::{ const MAX_SESSIONS: usize = 64; const REAP_GRACE_SECS: u64 = 60; +const MAX_SESSION_TIMEOUT: Duration = Duration::from_secs(86_400); + +fn validate_session_timeout(timeout: Duration) -> Result { + if timeout.is_zero() { + return Err(FrostNetError::Session( + "Session timeout must be greater than zero".into(), + )); + } + if timeout > MAX_SESSION_TIMEOUT { + return Err(FrostNetError::Session( + format!( + "Session timeout {}s exceeds maximum {}s", + timeout.as_secs(), + MAX_SESSION_TIMEOUT.as_secs() + ) + .into(), + )); + } + Ok(timeout) +} #[derive(Debug, Clone, PartialEq, Eq)] pub enum DescriptorSessionState { @@ -378,11 +398,12 @@ impl DescriptorSessionManager { } } - pub fn with_timeout(timeout: Duration) -> Self { - Self { + pub fn with_timeout(timeout: Duration) -> Result { + let validated = validate_session_timeout(timeout)?; + Ok(Self { sessions: HashMap::new(), - default_timeout: timeout, - } + default_timeout: validated, + }) } pub fn session_count(&self) -> usize { @@ -438,7 +459,10 @@ impl DescriptorSessionManager { )); } - let effective_timeout = timeout.unwrap_or(self.default_timeout); + let effective_timeout = match timeout { + Some(t) => validate_session_timeout(t)?, + None => self.default_timeout, + }; let session = DescriptorSession::new( session_id, group_pubkey, @@ -1038,7 +1062,7 @@ mod tests { #[test] fn test_session_manager_cleanup_expired() { - let mut manager = DescriptorSessionManager::with_timeout(Duration::from_millis(1)); + let mut manager = DescriptorSessionManager::with_timeout(Duration::from_millis(1)).unwrap(); let policy = test_policy(); manager @@ -1217,7 +1241,7 @@ mod tests { #[test] fn test_cleanup_returns_phase_reasons() { - let mut manager = DescriptorSessionManager::with_timeout(Duration::from_secs(600)); + let mut manager = DescriptorSessionManager::with_timeout(Duration::from_secs(600)).unwrap(); let policy = test_policy(); { diff --git a/keep-frost-net/src/node/descriptor.rs b/keep-frost-net/src/node/descriptor.rs index bc3ebbf4..492ca708 100644 --- a/keep-frost-net/src/node/descriptor.rs +++ b/keep-frost-net/src/node/descriptor.rs @@ -37,7 +37,6 @@ impl KfpNode { own_fingerprint: &str, timeout_secs: Option, ) -> Result<[u8; 32]> { - let our_index = self.share.metadata.identifier; self.check_proposer_authorized(our_index)?; diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index 5de20ec6..efd60d36 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -427,7 +427,7 @@ impl KfpNode { }; let descriptor_manager = match session_timeout { - Some(t) => DescriptorSessionManager::with_timeout(t), + Some(t) => DescriptorSessionManager::with_timeout(t)?, None => DescriptorSessionManager::new(), }; diff --git a/keep-mobile/src/error.rs b/keep-mobile/src/error.rs index c20cafe9..1f050055 100644 --- a/keep-mobile/src/error.rs +++ b/keep-mobile/src/error.rs @@ -74,6 +74,9 @@ pub enum KeepMobileError { #[error("Policy signature verification failed")] PolicySignatureInvalid, + #[error("Invalid input: {msg}")] + InvalidInput { msg: String }, + #[error("Certificate pin mismatch")] CertificatePinMismatch { hostname: String, diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index a3dd404e..a48826d3 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -729,6 +729,11 @@ impl KeepMobile { } pub fn health_check(&self, timeout_secs: u64) -> Result, KeepMobileError> { + if timeout_secs == 0 || timeout_secs > 300 { + return Err(KeepMobileError::InvalidInput { + msg: format!("timeout_secs must be between 1 and 300, got {timeout_secs}"), + }); + } self.runtime.block_on(async { let node_guard = self.node.read().await; let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; @@ -758,12 +763,9 @@ impl KeepMobile { .chain(result.unresponsive.iter().map(|&idx| (idx, false))) .collect(); for (idx, responsive) in &all_shares { - let existing_created_at = persistence::existing_created_at( - &self.storage, - &group_pubkey, - *idx, - ); - let _ = persistence::persist_health_status( + let existing_created_at = + persistence::existing_created_at(&self.storage, &group_pubkey, *idx); + if let Err(e) = persistence::persist_health_status( &self.storage, &KeyHealthStatusInfo { group_pubkey: group_pubkey.clone(), @@ -774,7 +776,14 @@ impl KeepMobile { is_stale: false, is_critical: false, }, - ); + ) { + tracing::warn!( + group_pubkey = %group_pubkey, + share_index = %idx, + error = %e, + "Failed to persist health status" + ); + } } Ok(result.responsive) diff --git a/keep-mobile/src/persistence.rs b/keep-mobile/src/persistence.rs index 45e2d2f7..82971f71 100644 --- a/keep-mobile/src/persistence.rs +++ b/keep-mobile/src/persistence.rs @@ -355,8 +355,7 @@ pub(crate) fn existing_created_at( group_pubkey_hex: &str, share_index: u16, ) -> Option { - load_stored_health_status(storage, group_pubkey_hex, share_index) - .and_then(|s| s.created_at) + load_stored_health_status(storage, group_pubkey_hex, share_index).and_then(|s| s.created_at) } fn load_stored_health_status( From 502cc0716356d97751f69e4fa8dbe26d38e517a0 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 12:23:21 -0500 Subject: [PATCH 04/13] Remove unnecessary .into() on format string --- keep-frost-net/src/descriptor_session.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index fa2566df..6a22c643 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -32,8 +32,7 @@ fn validate_session_timeout(timeout: Duration) -> Result { "Session timeout {}s exceeds maximum {}s", timeout.as_secs(), MAX_SESSION_TIMEOUT.as_secs() - ) - .into(), + ), )); } Ok(timeout) From 0b02149b25156ecb4df15c1f220701110fd4b47d Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 12:28:39 -0500 Subject: [PATCH 05/13] Fix rustfmt formatting in descriptor_session.rs --- keep-frost-net/src/descriptor_session.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index 6a22c643..e166ce75 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -27,13 +27,11 @@ fn validate_session_timeout(timeout: Duration) -> Result { )); } if timeout > MAX_SESSION_TIMEOUT { - return Err(FrostNetError::Session( - format!( - "Session timeout {}s exceeds maximum {}s", - timeout.as_secs(), - MAX_SESSION_TIMEOUT.as_secs() - ), - )); + return Err(FrostNetError::Session(format!( + "Session timeout {}s exceeds maximum {}s", + timeout.as_secs(), + MAX_SESSION_TIMEOUT.as_secs() + ))); } Ok(timeout) } From 4599f509da6b321c90605faa207a2cf3a686820b Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Thu, 26 Feb 2026 12:40:45 -0500 Subject: [PATCH 06/13] Validate network early, fix peer snapshot race, log health callback errors --- keep-frost-net/src/node/descriptor.rs | 6 ++++++ keep-frost-net/src/node/mod.rs | 20 ++++++++++++++------ keep-mobile/src/lib.rs | 6 ++++-- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/keep-frost-net/src/node/descriptor.rs b/keep-frost-net/src/node/descriptor.rs index 492ca708..005b32d8 100644 --- a/keep-frost-net/src/node/descriptor.rs +++ b/keep-frost-net/src/node/descriptor.rs @@ -37,6 +37,12 @@ impl KfpNode { own_fingerprint: &str, timeout_secs: Option, ) -> Result<[u8; 32]> { + if !VALID_NETWORKS.contains(&network) { + return Err(FrostNetError::Session(format!( + "Invalid network: {network}" + ))); + } + let our_index = self.share.metadata.identifier; self.check_proposer_authorized(our_index)?; diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index efd60d36..77449c79 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -1066,19 +1066,19 @@ impl KfpNode { } pub async fn health_check(&self, timeout: Duration) -> Result { - let all_peers: Vec = self + let peers_snapshot: Vec<(u16, PublicKey, std::time::Instant)> = self .peers .read() .all_peers() .iter() - .map(|p| p.share_index) + .map(|p| (p.share_index, p.pubkey, p.last_seen)) .collect(); - let responsive = self.ping_peers(timeout).await?; - let unresponsive: Vec = all_peers + let responsive = self.ping_peers_snapshot(&peers_snapshot, timeout).await?; + let unresponsive: Vec = peers_snapshot .iter() + .map(|(idx, _, _)| *idx) .filter(|idx| !responsive.contains(idx)) - .copied() .collect(); let _ = self.event_tx.send(KfpNodeEvent::HealthCheckComplete { @@ -1102,11 +1102,19 @@ impl KfpNode { .map(|p| (p.share_index, p.pubkey, p.last_seen)) .collect(); + self.ping_peers_snapshot(&peers_snapshot, timeout).await + } + + async fn ping_peers_snapshot( + &self, + peers_snapshot: &[(u16, PublicKey, std::time::Instant)], + timeout: Duration, + ) -> Result> { if peers_snapshot.is_empty() { return Ok(Vec::new()); } - for (_, pubkey, _) in &peers_snapshot { + for (_, pubkey, _) in peers_snapshot { if let Ok(event) = KfpEventBuilder::ping(&self.keys, pubkey) { let _ = self.client.send_event(&event).await; } diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index a48826d3..db5fccf7 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -746,10 +746,12 @@ impl KeepMobile { .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; if let Some(cb) = self.health_callbacks.read().await.as_ref() { - let _ = cb.on_health_check_complete( + if let Err(e) = cb.on_health_check_complete( result.responsive.clone(), result.unresponsive.clone(), - ); + ) { + tracing::warn!("Health check callback failed: {e}"); + } } let now = std::time::SystemTime::now() From 6e00f8de9267a3b81b2d981f369ccee220e840d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:18:56 -0500 Subject: [PATCH 07/13] Restrict session_id=None wallet progress to Contributed variant only --- keep-desktop/src/app.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/keep-desktop/src/app.rs b/keep-desktop/src/app.rs index c0678ef8..ab747444 100644 --- a/keep-desktop/src/app.rs +++ b/keep-desktop/src/app.rs @@ -1355,9 +1355,10 @@ impl App { self.update_wallet_setup(&sid, |setup| { setup.phase = SetupPhase::Coordinating(progress); }); - } else if let Screen::Wallet(WalletScreen { setup: Some(s), .. }) = &mut self.screen - { - s.phase = SetupPhase::Coordinating(progress); + } else if matches!(progress, DescriptorProgress::Contributed) { + if let Screen::Wallet(WalletScreen { setup: Some(s), .. }) = &mut self.screen { + s.phase = SetupPhase::Coordinating(progress); + } } Task::none() } From 947770178d14fab51b5dc9c5e5aade4db037f75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:22:13 -0500 Subject: [PATCH 08/13] Simplify: deduplicate timeout constant, remove needless refs and allocations --- keep-cli/src/commands/frost_network/mod.rs | 6 ++---- keep-cli/src/commands/wallet.rs | 5 +++-- keep-core/src/storage.rs | 12 ++++++------ keep-frost-net/src/descriptor_session.rs | 19 +++++++++---------- keep-frost-net/src/node/mod.rs | 14 ++++++++------ keep-mobile/src/lib.rs | 11 +++++------ 6 files changed, 33 insertions(+), 34 deletions(-) diff --git a/keep-cli/src/commands/frost_network/mod.rs b/keep-cli/src/commands/frost_network/mod.rs index f5460532..320cf344 100644 --- a/keep-cli/src/commands/frost_network/mod.rs +++ b/keep-cli/src/commands/frost_network/mod.rs @@ -407,10 +407,8 @@ pub fn cmd_frost_network_sign( .map_err(|e| KeepError::Frost(e.to_string()))?; out.info("Starting FROST coordination node..."); - out.field("Node pubkey", &{ - let pk = node.pubkey(); - pk.to_bech32().unwrap_or_else(|_| format!("{pk}")) - }); + let pk = node.pubkey(); + out.field("Node pubkey", &pk.to_bech32().unwrap_or_else(|_| format!("{pk}"))); out.newline(); let node = std::sync::Arc::new(node); diff --git a/keep-cli/src/commands/wallet.rs b/keep-cli/src/commands/wallet.rs index 94ced42c..7fdb4569 100644 --- a/keep-cli/src/commands/wallet.rs +++ b/keep-cli/src/commands/wallet.rs @@ -615,9 +615,10 @@ pub fn cmd_wallet_propose( spinner.finish(); let spinner = out.spinner("Waiting for ACKs..."); + let default_ack = keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS; let ack_timeout = timeout_secs - .map(|t| t.clamp(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS, keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS * 4)) - .unwrap_or(keep_frost_net::DESCRIPTOR_ACK_TIMEOUT_SECS); + .map(|t| t.clamp(default_ack, default_ack * 4)) + .unwrap_or(default_ack); let ack_deadline = tokio::time::Instant::now() + Duration::from_secs(ack_timeout); let mut external_descriptor = String::new(); diff --git a/keep-core/src/storage.rs b/keep-core/src/storage.rs index 9cd33509..f23f5900 100644 --- a/keep-core/src/storage.rs +++ b/keep-core/src/storage.rs @@ -583,7 +583,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let descriptor: WalletDescriptor = serde_json::from_slice(&decrypted_bytes) + let descriptor: WalletDescriptor = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(descriptor)) } @@ -601,7 +601,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let descriptor: WalletDescriptor = serde_json::from_slice(&decrypted_bytes) + let descriptor: WalletDescriptor = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; descriptors.push(descriptor); } @@ -651,7 +651,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let config: RelayConfig = serde_json::from_slice(&decrypted_bytes) + let config: RelayConfig = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(config)) } @@ -669,7 +669,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let config: RelayConfig = serde_json::from_slice(&decrypted_bytes) + let config: RelayConfig = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; configs.push(config); } @@ -755,7 +755,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) + let status: KeyHealthStatus = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(status)) } @@ -772,7 +772,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) + let status: KeyHealthStatus = serde_json::from_slice(decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; statuses.push(status); } diff --git a/keep-frost-net/src/descriptor_session.rs b/keep-frost-net/src/descriptor_session.rs index e166ce75..5201e626 100644 --- a/keep-frost-net/src/descriptor_session.rs +++ b/keep-frost-net/src/descriptor_session.rs @@ -12,25 +12,25 @@ use sha2::{Digest, Sha256}; use crate::error::{FrostNetError, Result}; use crate::protocol::{ KeySlot, WalletPolicy, DESCRIPTOR_ACK_PHASE_TIMEOUT_SECS, DESCRIPTOR_CONTRIBUTION_TIMEOUT_SECS, - DESCRIPTOR_FINALIZE_TIMEOUT_SECS, DESCRIPTOR_SESSION_TIMEOUT_SECS, MAX_FINGERPRINT_LENGTH, - MAX_XPUB_LENGTH, + DESCRIPTOR_FINALIZE_TIMEOUT_SECS, DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS, + DESCRIPTOR_SESSION_TIMEOUT_SECS, MAX_FINGERPRINT_LENGTH, MAX_XPUB_LENGTH, }; const MAX_SESSIONS: usize = 64; const REAP_GRACE_SECS: u64 = 60; -const MAX_SESSION_TIMEOUT: Duration = Duration::from_secs(86_400); fn validate_session_timeout(timeout: Duration) -> Result { + let max = Duration::from_secs(DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS); if timeout.is_zero() { return Err(FrostNetError::Session( "Session timeout must be greater than zero".into(), )); } - if timeout > MAX_SESSION_TIMEOUT { + if timeout > max { return Err(FrostNetError::Session(format!( "Session timeout {}s exceeds maximum {}s", timeout.as_secs(), - MAX_SESSION_TIMEOUT.as_secs() + max.as_secs() ))); } Ok(timeout) @@ -407,7 +407,6 @@ impl DescriptorSessionManager { self.sessions.len() } - #[allow(clippy::too_many_arguments)] pub fn create_session( &mut self, session_id: [u8; 32], @@ -456,10 +455,10 @@ impl DescriptorSessionManager { )); } - let effective_timeout = match timeout { - Some(t) => validate_session_timeout(t)?, - None => self.default_timeout, - }; + let effective_timeout = timeout + .map(validate_session_timeout) + .transpose()? + .unwrap_or(self.default_timeout); let session = DescriptorSession::new( session_id, group_pubkey, diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index 77449c79..4fc5cacc 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -1081,16 +1081,18 @@ impl KfpNode { .filter(|idx| !responsive.contains(idx)) .collect(); + let result = HealthCheckResult { + responsive, + unresponsive, + }; + let _ = self.event_tx.send(KfpNodeEvent::HealthCheckComplete { group_pubkey: self.group_pubkey, - responsive: responsive.clone(), - unresponsive: unresponsive.clone(), + responsive: result.responsive.clone(), + unresponsive: result.unresponsive.clone(), }); - Ok(HealthCheckResult { - responsive, - unresponsive, - }) + Ok(result) } pub async fn ping_peers(&self, timeout: Duration) -> Result> { diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index db5fccf7..410d4d4d 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -758,22 +758,21 @@ impl KeepMobile { .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs(); - let all_shares: Vec<(u16, bool)> = result + for (idx, responsive) in result .responsive .iter() .map(|&idx| (idx, true)) .chain(result.unresponsive.iter().map(|&idx| (idx, false))) - .collect(); - for (idx, responsive) in &all_shares { + { let existing_created_at = - persistence::existing_created_at(&self.storage, &group_pubkey, *idx); + persistence::existing_created_at(&self.storage, &group_pubkey, idx); if let Err(e) = persistence::persist_health_status( &self.storage, &KeyHealthStatusInfo { group_pubkey: group_pubkey.clone(), - share_index: *idx, + share_index: idx, last_check_timestamp: now, - responsive: *responsive, + responsive, created_at: existing_created_at.unwrap_or(now), is_stale: false, is_critical: false, From 94037502286beb63ce1b7523a73fc41234b4e4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:25:00 -0500 Subject: [PATCH 09/13] Restore necessary borrow references in storage deserialization --- keep-core/src/storage.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/keep-core/src/storage.rs b/keep-core/src/storage.rs index f23f5900..9cd33509 100644 --- a/keep-core/src/storage.rs +++ b/keep-core/src/storage.rs @@ -583,7 +583,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let descriptor: WalletDescriptor = serde_json::from_slice(decrypted_bytes) + let descriptor: WalletDescriptor = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(descriptor)) } @@ -601,7 +601,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let descriptor: WalletDescriptor = serde_json::from_slice(decrypted_bytes) + let descriptor: WalletDescriptor = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; descriptors.push(descriptor); } @@ -651,7 +651,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let config: RelayConfig = serde_json::from_slice(decrypted_bytes) + let config: RelayConfig = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(config)) } @@ -669,7 +669,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let config: RelayConfig = serde_json::from_slice(decrypted_bytes) + let config: RelayConfig = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; configs.push(config); } @@ -755,7 +755,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let status: KeyHealthStatus = serde_json::from_slice(decrypted_bytes) + let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; Ok(Some(status)) } @@ -772,7 +772,7 @@ impl Storage { let encrypted = EncryptedData::from_bytes(&encrypted_bytes)?; let decrypted = crypto::decrypt(&encrypted, data_key)?; let decrypted_bytes = decrypted.as_slice()?; - let status: KeyHealthStatus = serde_json::from_slice(decrypted_bytes) + let status: KeyHealthStatus = serde_json::from_slice(&decrypted_bytes) .map_err(|e| KeepError::Other(format!("json deserialization failed: {e}")))?; statuses.push(status); } From 933c3d16a47dcd8772f6861387763bbbc25af345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:30:28 -0500 Subject: [PATCH 10/13] Validate timeout early, minimize lock scopes, log ping errors --- keep-frost-net/src/node/mod.rs | 34 +++++++++++++++----- keep-mobile/src/lib.rs | 58 ++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 31 deletions(-) diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index 4fc5cacc..2b24a709 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -365,6 +365,11 @@ impl KfpNode { proxy: Option, session_timeout: Option, ) -> Result { + let descriptor_manager = match session_timeout { + Some(t) => DescriptorSessionManager::with_timeout(t)?, + None => DescriptorSessionManager::new(), + }; + for relay in &relays { let validate = if ALLOW_INTERNAL_HOSTS { validate_relay_url_allow_internal @@ -426,11 +431,6 @@ impl KfpNode { None => EcdhSessionManager::new(), }; - let descriptor_manager = match session_timeout { - Some(t) => DescriptorSessionManager::with_timeout(t)?, - None => DescriptorSessionManager::new(), - }; - let audit_hmac_key = derive_audit_hmac_key(&keys, &group_pubkey); let audit_log = Arc::new(SigningAuditLog::new(audit_hmac_key)); @@ -1066,6 +1066,7 @@ impl KfpNode { } pub async fn health_check(&self, timeout: Duration) -> Result { + let timeout = timeout.clamp(Duration::from_secs(1), Duration::from_secs(300)); let peers_snapshot: Vec<(u16, PublicKey, std::time::Instant)> = self .peers .read() @@ -1116,9 +1117,26 @@ impl KfpNode { return Ok(Vec::new()); } - for (_, pubkey, _) in peers_snapshot { - if let Ok(event) = KfpEventBuilder::ping(&self.keys, pubkey) { - let _ = self.client.send_event(&event).await; + for (share_index, pubkey, _) in peers_snapshot { + match KfpEventBuilder::ping(&self.keys, pubkey) { + Ok(event) => { + if let Err(e) = self.client.send_event(&event).await { + warn!( + peer = %pubkey, + share_index, + error = %e, + "Failed to send ping" + ); + } + } + Err(e) => { + warn!( + peer = %pubkey, + share_index, + error = %e, + "Failed to build ping event" + ); + } } } diff --git a/keep-mobile/src/lib.rs b/keep-mobile/src/lib.rs index 410d4d4d..b5ab71a0 100644 --- a/keep-mobile/src/lib.rs +++ b/keep-mobile/src/lib.rs @@ -735,15 +735,16 @@ impl KeepMobile { }); } self.runtime.block_on(async { - let node_guard = self.node.read().await; - let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; - - let group_pubkey = hex::encode(node.group_pubkey()); - - let result = node - .health_check(Duration::from_secs(timeout_secs)) - .await - .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; + let (group_pubkey, result) = { + let node_guard = self.node.read().await; + let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; + let gp = hex::encode(node.group_pubkey()); + let r = node + .health_check(Duration::from_secs(timeout_secs)) + .await + .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; + (gp, r) + }; if let Some(cb) = self.health_callbacks.read().await.as_ref() { if let Err(e) = cb.on_health_check_complete( @@ -814,26 +815,36 @@ impl KeepMobile { msg: format!("Invalid network: {network}"), }); } + if let Some(t) = timeout_secs { + if t == 0 || t > keep_frost_net::DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS { + return Err(KeepMobileError::InvalidInput { + msg: format!( + "timeout_secs must be between 1 and {}, got {t}", + keep_frost_net::DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS + ), + }); + } + } self.runtime.block_on(async { - let node_guard = self.node.read().await; - let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; + let session_id = { + let node_guard = self.node.read().await; + let node = node_guard.as_ref().ok_or(KeepMobileError::NotInitialized)?; - let share_info = self - .storage - .get_share_metadata() - .ok_or(KeepMobileError::NotInitialized)?; + let share_info = self + .storage + .get_share_metadata() + .ok_or(KeepMobileError::NotInitialized)?; - let policy = build_wallet_policy(&tiers, share_info.total_shares)?; + let policy = build_wallet_policy(&tiers, share_info.total_shares)?; - let (xpub, fingerprint) = node - .derive_account_xpub(&network) - .map_err(|e| KeepMobileError::FrostError { msg: e.to_string() })?; + let (xpub, fingerprint) = node + .derive_account_xpub(&network) + .map_err(|e| KeepMobileError::FrostError { msg: e.to_string() })?; - validate_xpub_network(&xpub, &network)?; + validate_xpub_network(&xpub, &network)?; - let session_id = node - .request_descriptor_with_timeout( + node.request_descriptor_with_timeout( policy, &network, &xpub, @@ -841,7 +852,8 @@ impl KeepMobile { timeout_secs, ) .await - .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })?; + .map_err(|e| KeepMobileError::NetworkError { msg: e.to_string() })? + }; self.descriptor_networks .lock() From e6f9cfff7ac445118e13b5cf51bae8b68eb4ba3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:31:09 -0500 Subject: [PATCH 11/13] Fix rustfmt formatting --- keep-cli/src/commands/frost_network/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/keep-cli/src/commands/frost_network/mod.rs b/keep-cli/src/commands/frost_network/mod.rs index 320cf344..d470eb7e 100644 --- a/keep-cli/src/commands/frost_network/mod.rs +++ b/keep-cli/src/commands/frost_network/mod.rs @@ -408,7 +408,10 @@ pub fn cmd_frost_network_sign( out.info("Starting FROST coordination node..."); let pk = node.pubkey(); - out.field("Node pubkey", &pk.to_bech32().unwrap_or_else(|_| format!("{pk}"))); + out.field( + "Node pubkey", + &pk.to_bech32().unwrap_or_else(|_| format!("{pk}")), + ); out.newline(); let node = std::sync::Arc::new(node); From 124a47f2af514626faa8600259f22c128140c18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:40:50 -0500 Subject: [PATCH 12/13] Return errors for invalid timeouts, clean stale health index keys --- keep-frost-net/src/node/descriptor.rs | 15 ++++++++--- keep-frost-net/src/node/mod.rs | 7 ++++- keep-mobile/src/persistence.rs | 37 ++++++++++++++++----------- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/keep-frost-net/src/node/descriptor.rs b/keep-frost-net/src/node/descriptor.rs index 005b32d8..b4865a71 100644 --- a/keep-frost-net/src/node/descriptor.rs +++ b/keep-frost-net/src/node/descriptor.rs @@ -202,10 +202,17 @@ impl KfpNode { let our_index = self.share.metadata.identifier; let we_are_contributor = expected_contributors.contains(&our_index); - let propose_timeout = payload - .timeout_secs - .filter(|&t| t > 0 && t <= DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS) - .map(std::time::Duration::from_secs); + let propose_timeout = match payload.timeout_secs { + None => None, + Some(t) if t > 0 && t <= DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS => { + Some(std::time::Duration::from_secs(t)) + } + Some(t) => { + return Err(FrostNetError::Session(format!( + "Invalid proposal timeout {t}s, must be 1..={DESCRIPTOR_SESSION_MAX_TIMEOUT_SECS}" + ))); + } + }; let session_created = { let mut sessions = self.descriptor_sessions.write(); diff --git a/keep-frost-net/src/node/mod.rs b/keep-frost-net/src/node/mod.rs index 2b24a709..547b6145 100644 --- a/keep-frost-net/src/node/mod.rs +++ b/keep-frost-net/src/node/mod.rs @@ -1066,7 +1066,12 @@ impl KfpNode { } pub async fn health_check(&self, timeout: Duration) -> Result { - let timeout = timeout.clamp(Duration::from_secs(1), Duration::from_secs(300)); + if timeout < Duration::from_secs(1) || timeout > Duration::from_secs(300) { + return Err(FrostNetError::Session(format!( + "Health check timeout must be between 1s and 300s, got {}s", + timeout.as_secs() + ))); + } let peers_snapshot: Vec<(u16, PublicKey, std::time::Instant)> = self .peers .read() diff --git a/keep-mobile/src/persistence.rs b/keep-mobile/src/persistence.rs index 82971f71..b3b77e10 100644 --- a/keep-mobile/src/persistence.rs +++ b/keep-mobile/src/persistence.rs @@ -402,21 +402,28 @@ pub(crate) fn load_health_statuses(storage: &Arc) -> Vec(&data) { - let stale_age = now.saturating_sub(stored.last_check_timestamp); - results.push(KeyHealthStatusInfo { - group_pubkey: stored.group_pubkey, - share_index: stored.share_index, - last_check_timestamp: stored.last_check_timestamp, - responsive: stored.responsive, - created_at: stored.created_at.unwrap_or(stored.last_check_timestamp), - is_stale: stale_age >= keep_core::wallet::KEY_HEALTH_STALE_THRESHOLD_SECS, - is_critical: stale_age >= keep_core::wallet::KEY_HEALTH_CRITICAL_THRESHOLD_SECS, - }); - } - } + let mut live_keys = Vec::with_capacity(index.len()); + for key in &index { + let Some(data) = storage.load_share_by_key(key.clone()).ok() else { + continue; + }; + let Some(stored) = serde_json::from_slice::(&data).ok() else { + continue; + }; + live_keys.push(key.clone()); + let stale_age = now.saturating_sub(stored.last_check_timestamp); + results.push(KeyHealthStatusInfo { + group_pubkey: stored.group_pubkey, + share_index: stored.share_index, + last_check_timestamp: stored.last_check_timestamp, + responsive: stored.responsive, + created_at: stored.created_at.unwrap_or(stored.last_check_timestamp), + is_stale: stale_age >= keep_core::wallet::KEY_HEALTH_STALE_THRESHOLD_SECS, + is_critical: stale_age >= keep_core::wallet::KEY_HEALTH_CRITICAL_THRESHOLD_SECS, + }); + } + if live_keys.len() < index.len() { + let _ = persist_health_index(storage, &live_keys); } results } From 4ab1ae6a7bf63aefff84065fa1500621532d1015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20=F0=9F=90=86?= Date: Thu, 26 Feb 2026 13:46:07 -0500 Subject: [PATCH 13/13] Flag future health timestamps as stale, log session creation errors --- keep-frost-net/src/node/descriptor.rs | 4 ++-- keep-mobile/src/persistence.rs | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/keep-frost-net/src/node/descriptor.rs b/keep-frost-net/src/node/descriptor.rs index b4865a71..84d6525b 100644 --- a/keep-frost-net/src/node/descriptor.rs +++ b/keep-frost-net/src/node/descriptor.rs @@ -237,8 +237,8 @@ impl KfpNode { } true } - Err(_) => { - debug!("Descriptor session already exists, ignoring duplicate proposal"); + Err(e) => { + debug!("Descriptor session creation failed: {e}"); false } } diff --git a/keep-mobile/src/persistence.rs b/keep-mobile/src/persistence.rs index b3b77e10..6d2a1990 100644 --- a/keep-mobile/src/persistence.rs +++ b/keep-mobile/src/persistence.rs @@ -411,7 +411,9 @@ pub(crate) fn load_health_statuses(storage: &Arc) -> Vec