diff --git a/iroh-net/src/magicsock/node_map.rs b/iroh-net/src/magicsock/node_map.rs index 3550f34bfb..a91510674c 100644 --- a/iroh-net/src/magicsock/node_map.rs +++ b/iroh-net/src/magicsock/node_map.rs @@ -30,6 +30,7 @@ use crate::{ mod best_addr; mod node_state; +mod path_state; mod udp_paths; pub use node_state::{ConnectionType, ControlMsg, DirectAddrInfo, NodeInfo}; diff --git a/iroh-net/src/magicsock/node_map/node_state.rs b/iroh-net/src/magicsock/node_map/node_state.rs index 375fb706d7..c387b97647 100644 --- a/iroh-net/src/magicsock/node_map/node_state.rs +++ b/iroh-net/src/magicsock/node_map/node_state.rs @@ -1,4 +1,4 @@ -use std::collections::{btree_map::Entry, BTreeMap, BTreeSet, HashMap}; +use std::collections::{btree_map::Entry, BTreeSet, HashMap}; use std::hash::Hash; use std::net::{IpAddr, SocketAddr}; use std::time::{Duration, Instant}; @@ -19,6 +19,7 @@ use crate::util::relay_only_mode; use crate::{stun, NodeAddr, NodeId}; use super::best_addr::{self, ClearReason, Source}; +use super::path_state::{summarize_node_paths, PathState}; use super::udp_paths::{NodeUdpPaths, UdpSendAddr}; use super::IpPort; @@ -33,16 +34,12 @@ const LAST_ALIVE_PRUNE_DURATION: Duration = Duration::from_secs(120); /// How long we wait for a pong reply before assuming it's never coming. const PING_TIMEOUT_DURATION: Duration = Duration::from_secs(5); -/// The minimum time between pings to an endpoint. (Except in the case of CallMeMaybe frames -/// resetting the counter, as the first pings likely didn't through the firewall) -const DISCO_PING_INTERVAL: Duration = Duration::from_secs(5); - /// The latency at or under which we don't try to upgrade to a better path. const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5); /// How long since the last activity we try to keep an established endpoint peering alive. /// It's also the idle time at which we stop doing STUN queries to keep NAT mappings alive. -const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45); +pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45); /// How often we try to upgrade to a better patheven if we have some non-relay route that works. const UPGRADE_INTERVAL: Duration = Duration::from_secs(60); @@ -1130,267 +1127,6 @@ impl From for NodeAddr { } } -/// State about a particular path to another [`NodeState`]. -/// -/// This state is used for both the relay path and any direct UDP paths. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(super) struct PathState { - /// The node for which this path exists. - node_id: NodeId, - /// The path this applies for. - path: SendAddr, - /// The last (outgoing) ping time. - last_ping: Option, - - /// If non-zero, means that this was an endpoint that we learned about at runtime (from an - /// incoming ping). If so, we keep the time updated and use it to discard old candidates. - // NOTE: tx_id Originally added in tailscale due to . - last_got_ping: Option<(Instant, stun::TransactionId)>, - - /// If non-zero, is the time this endpoint was advertised last via a call-me-maybe disco message. - call_me_maybe_time: Option, - - /// Last [`PongReply`] received. - pub(super) recent_pong: Option, - /// When was this endpoint last used to transmit payload data (removing ping, pong, etc). - pub(super) last_payload_msg: Option, -} - -impl PathState { - fn new(node_id: NodeId, path: SendAddr) -> Self { - Self { - node_id, - path, - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - recent_pong: None, - last_payload_msg: None, - } - } - - pub(super) fn udp_addr(&self) -> Option { - match self.path { - SendAddr::Udp(addr) => Some(addr), - SendAddr::Relay(_) => None, - } - } - - pub(super) fn with_last_payload(node_id: NodeId, path: SendAddr, now: Instant) -> Self { - PathState { - node_id, - path, - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - recent_pong: None, - last_payload_msg: Some(now), - } - } - - pub(super) fn with_ping( - node_id: NodeId, - path: SendAddr, - tx_id: stun::TransactionId, - now: Instant, - ) -> Self { - let mut new = PathState::new(node_id, path); - new.handle_ping(tx_id, now); - new - } - - pub(super) fn add_pong_reply(&mut self, r: PongReply) { - if let SendAddr::Udp(ref path) = self.path { - if self.recent_pong.is_none() { - event!( - target: "events.net.holepunched", - Level::DEBUG, - node = %self.node_id.fmt_short(), - path = ?path, - direction = "outgoing", - ); - } - } - self.recent_pong = Some(r); - } - - #[cfg(test)] - pub(super) fn with_pong_reply(node_id: NodeId, r: PongReply) -> Self { - PathState { - node_id, - path: r.from.clone(), - last_ping: None, - last_got_ping: None, - call_me_maybe_time: None, - recent_pong: Some(r), - last_payload_msg: None, - } - } - - /// Check whether this path is considered active. - /// - /// Active means the path has received payload messages within the last - /// [`SESSION_ACTIVE_TIMEOUT`]. - /// - /// Note that a path might be alive but not active if it's contactable but not in - /// use. - pub(super) fn is_active(&self) -> bool { - self.last_payload_msg - .as_ref() - .map(|instant| instant.elapsed() <= SESSION_ACTIVE_TIMEOUT) - .unwrap_or(false) - } - - /// Returns the instant the last incoming ping was received. - pub(super) fn last_incoming_ping(&self) -> Option<&Instant> { - self.last_got_ping.as_ref().map(|(time, _tx_id)| time) - } - - /// Reports the last instant this path was considered alive. - /// - /// Alive means the path is considered in use by the remote endpoint. Either because we - /// received a payload message, a DISCO message (ping, pong) or it was advertised in a - /// call-me-maybe message. - /// - /// This is the most recent instant between: - /// - when last pong was received. - /// - when this path was last advertised in a received CallMeMaybe message. - /// - When the last payload transmission occurred. - /// - when the last ping from them was received. - pub(super) fn last_alive(&self) -> Option { - self.recent_pong() - .map(|pong| &pong.pong_at) - .into_iter() - .chain(self.last_payload_msg.as_ref()) - .chain(self.call_me_maybe_time.as_ref()) - .chain(self.last_incoming_ping()) - .max() - .copied() - } - - pub(super) fn last_control_msg(&self, now: Instant) -> Option<(Duration, ControlMsg)> { - // get every control message and assign it its kind - let last_pong = self - .recent_pong() - .map(|pong| (pong.pong_at, ControlMsg::Pong)); - let last_call_me_maybe = self - .call_me_maybe_time - .as_ref() - .map(|call_me| (*call_me, ControlMsg::CallMeMaybe)); - let last_ping = self - .last_incoming_ping() - .map(|ping| (*ping, ControlMsg::Ping)); - - last_pong - .into_iter() - .chain(last_call_me_maybe) - .chain(last_ping) - .max_by_key(|(instant, _kind)| *instant) - .map(|(instant, kind)| (now.duration_since(instant), kind)) - } - - /// Returns the most recent pong if available. - pub(super) fn recent_pong(&self) -> Option<&PongReply> { - self.recent_pong.as_ref() - } - - /// Returns the latency from the most recent pong, if available. - fn latency(&self) -> Option { - self.recent_pong.as_ref().map(|p| p.latency) - } - - fn needs_ping(&self, now: &Instant) -> bool { - match self.last_ping { - None => true, - Some(last_ping) => { - let elapsed = now.duration_since(last_ping); - - // TODO: remove! - // This logs "ping is too new" for each send whenever the endpoint does *not* need - // a ping. Pretty sure this is not a useful log, but maybe there was a reason? - // if !needs_ping { - // debug!("ping is too new: {}ms", elapsed.as_millis()); - // } - elapsed > DISCO_PING_INTERVAL - } - } - } - - fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole { - if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) { - PingRole::Duplicate - } else { - let prev = self.last_got_ping.replace((now, tx_id)); - let heartbeat_deadline = HEARTBEAT_INTERVAL + (HEARTBEAT_INTERVAL / 2); - match prev { - Some((prev_time, _tx)) if now.duration_since(prev_time) <= heartbeat_deadline => { - PingRole::LikelyHeartbeat - } - Some((prev_time, _tx)) => { - debug!( - elapsed = ?now.duration_since(prev_time), - "heartbeat missed, reactivating", - ); - PingRole::Activate - } - None => { - if let SendAddr::Udp(ref addr) = self.path { - event!( - target: "events.net.holepunched", - Level::DEBUG, - node = %self.node_id.fmt_short(), - path = ?addr, - direction = "incoming", - ); - } - PingRole::Activate - } - } - } - } - - fn clear(&mut self) { - self.last_ping = None; - self.last_got_ping = None; - self.call_me_maybe_time = None; - self.recent_pong = None; - } - - fn summary(&self, mut w: impl std::fmt::Write) -> std::fmt::Result { - write!(w, "{{ ")?; - if self.is_active() { - write!(w, "active ")?; - } - if let Some(ref pong) = self.recent_pong { - write!(w, "pong-received({:?} ago) ", pong.pong_at.elapsed())?; - } - if let Some(when) = self.last_incoming_ping() { - write!(w, "ping-received({:?} ago) ", when.elapsed())?; - } - if let Some(ref when) = self.last_ping { - write!(w, "ping-sent({:?} ago) ", when.elapsed())?; - } - write!(w, "}}") - } -} - -// TODO: Make an `EndpointPaths` struct and do things nicely. -fn summarize_node_paths(paths: &BTreeMap) -> String { - use std::fmt::Write; - - let mut w = String::new(); - write!(&mut w, "[").ok(); - for (i, (ipp, state)) in paths.iter().enumerate() { - if i > 0 { - write!(&mut w, ", ").ok(); - } - write!(&mut w, "{ipp}").ok(); - state.summary(&mut w).ok(); - } - write!(&mut w, "]").ok(); - w -} - /// Whether to send a call-me-maybe message after sending pings to all known paths. /// /// `IfNoRecent` will only send a call-me-maybe if no previous one was sent in the last @@ -1555,15 +1291,14 @@ pub enum ConnectionType { #[cfg(test)] mod tests { - use std::net::Ipv4Addr; + use std::{collections::BTreeMap, net::Ipv4Addr}; + + use crate::key::SecretKey; + use crate::magicsock::node_map::{NodeMap, NodeMapInner}; use best_addr::BestAddr; - use super::{ - super::{NodeMap, NodeMapInner}, - *, - }; - use crate::key::SecretKey; + use super::*; #[test] fn test_endpoint_infos() { diff --git a/iroh-net/src/magicsock/node_map/path_state.rs b/iroh-net/src/magicsock/node_map/path_state.rs new file mode 100644 index 0000000000..6121d8242d --- /dev/null +++ b/iroh-net/src/magicsock/node_map/path_state.rs @@ -0,0 +1,284 @@ +//! The state kept for each network path to a remote node. + +use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::time::{Duration, Instant}; + +use iroh_base::key::NodeId; +use tracing::{debug, event, Level}; + +use crate::disco::SendAddr; +use crate::magicsock::HEARTBEAT_INTERVAL; +use crate::stun; + +use super::node_state::{PongReply, SESSION_ACTIVE_TIMEOUT}; +use super::{ControlMsg, IpPort, PingRole}; + +/// The minimum time between pings to an endpoint. +/// +/// Except in the case of CallMeMaybe frames resetting the counter, as the first pings +/// likely didn't through the firewall. +const DISCO_PING_INTERVAL: Duration = Duration::from_secs(5); + +/// State about a particular path to another [`NodeState`]. +/// +/// This state is used for both the relay path and any direct UDP paths. +/// +/// [`NodeState`]: super::node_state::NodeState +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(super) struct PathState { + /// The node for which this path exists. + node_id: NodeId, + /// The path this applies for. + path: SendAddr, + /// The last (outgoing) ping time. + pub(super) last_ping: Option, + + /// If non-zero, means that this was an endpoint that we learned about at runtime (from an + /// incoming ping). If so, we keep the time updated and use it to discard old candidates. + // NOTE: tx_id Originally added in tailscale due to . + last_got_ping: Option<(Instant, stun::TransactionId)>, + + /// If non-zero, is the time this endpoint was advertised last via a call-me-maybe disco message. + pub(super) call_me_maybe_time: Option, + + /// Last [`PongReply`] received. + pub(super) recent_pong: Option, + /// When was this endpoint last used to transmit payload data (removing ping, pong, etc). + pub(super) last_payload_msg: Option, +} + +impl PathState { + pub(super) fn new(node_id: NodeId, path: SendAddr) -> Self { + Self { + node_id, + path, + last_ping: None, + last_got_ping: None, + call_me_maybe_time: None, + recent_pong: None, + last_payload_msg: None, + } + } + + pub(super) fn udp_addr(&self) -> Option { + match self.path { + SendAddr::Udp(addr) => Some(addr), + SendAddr::Relay(_) => None, + } + } + + pub(super) fn with_last_payload(node_id: NodeId, path: SendAddr, now: Instant) -> Self { + PathState { + node_id, + path, + last_ping: None, + last_got_ping: None, + call_me_maybe_time: None, + recent_pong: None, + last_payload_msg: Some(now), + } + } + + pub(super) fn with_ping( + node_id: NodeId, + path: SendAddr, + tx_id: stun::TransactionId, + now: Instant, + ) -> Self { + let mut new = PathState::new(node_id, path); + new.handle_ping(tx_id, now); + new + } + + pub(super) fn add_pong_reply(&mut self, r: PongReply) { + if let SendAddr::Udp(ref path) = self.path { + if self.recent_pong.is_none() { + event!( + target: "events.net.holepunched", + Level::DEBUG, + node = %self.node_id.fmt_short(), + path = ?path, + direction = "outgoing", + ); + } + } + self.recent_pong = Some(r); + } + + #[cfg(test)] + pub(super) fn with_pong_reply(node_id: NodeId, r: PongReply) -> Self { + PathState { + node_id, + path: r.from.clone(), + last_ping: None, + last_got_ping: None, + call_me_maybe_time: None, + recent_pong: Some(r), + last_payload_msg: None, + } + } + + /// Check whether this path is considered active. + /// + /// Active means the path has received payload messages within the last + /// [`SESSION_ACTIVE_TIMEOUT`]. + /// + /// Note that a path might be alive but not active if it's contactable but not in + /// use. + pub(super) fn is_active(&self) -> bool { + self.last_payload_msg + .as_ref() + .map(|instant| instant.elapsed() <= SESSION_ACTIVE_TIMEOUT) + .unwrap_or(false) + } + + /// Returns the instant the last incoming ping was received. + pub(super) fn last_incoming_ping(&self) -> Option<&Instant> { + self.last_got_ping.as_ref().map(|(time, _tx_id)| time) + } + + /// Reports the last instant this path was considered alive. + /// + /// Alive means the path is considered in use by the remote endpoint. Either because we + /// received a payload message, a DISCO message (ping, pong) or it was advertised in a + /// call-me-maybe message. + /// + /// This is the most recent instant between: + /// - when last pong was received. + /// - when this path was last advertised in a received CallMeMaybe message. + /// - When the last payload transmission occurred. + /// - when the last ping from them was received. + pub(super) fn last_alive(&self) -> Option { + self.recent_pong() + .map(|pong| &pong.pong_at) + .into_iter() + .chain(self.last_payload_msg.as_ref()) + .chain(self.call_me_maybe_time.as_ref()) + .chain(self.last_incoming_ping()) + .max() + .copied() + } + + pub(super) fn last_control_msg(&self, now: Instant) -> Option<(Duration, ControlMsg)> { + // get every control message and assign it its kind + let last_pong = self + .recent_pong() + .map(|pong| (pong.pong_at, ControlMsg::Pong)); + let last_call_me_maybe = self + .call_me_maybe_time + .as_ref() + .map(|call_me| (*call_me, ControlMsg::CallMeMaybe)); + let last_ping = self + .last_incoming_ping() + .map(|ping| (*ping, ControlMsg::Ping)); + + last_pong + .into_iter() + .chain(last_call_me_maybe) + .chain(last_ping) + .max_by_key(|(instant, _kind)| *instant) + .map(|(instant, kind)| (now.duration_since(instant), kind)) + } + + /// Returns the most recent pong if available. + pub(super) fn recent_pong(&self) -> Option<&PongReply> { + self.recent_pong.as_ref() + } + + /// Returns the latency from the most recent pong, if available. + pub(super) fn latency(&self) -> Option { + self.recent_pong.as_ref().map(|p| p.latency) + } + + pub(super) fn needs_ping(&self, now: &Instant) -> bool { + match self.last_ping { + None => true, + Some(last_ping) => { + let elapsed = now.duration_since(last_ping); + + // TODO: remove! + // This logs "ping is too new" for each send whenever the endpoint does *not* need + // a ping. Pretty sure this is not a useful log, but maybe there was a reason? + // if !needs_ping { + // debug!("ping is too new: {}ms", elapsed.as_millis()); + // } + elapsed > DISCO_PING_INTERVAL + } + } + } + + pub(super) fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole { + if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) { + PingRole::Duplicate + } else { + let prev = self.last_got_ping.replace((now, tx_id)); + let heartbeat_deadline = HEARTBEAT_INTERVAL + (HEARTBEAT_INTERVAL / 2); + match prev { + Some((prev_time, _tx)) if now.duration_since(prev_time) <= heartbeat_deadline => { + PingRole::LikelyHeartbeat + } + Some((prev_time, _tx)) => { + debug!( + elapsed = ?now.duration_since(prev_time), + "heartbeat missed, reactivating", + ); + PingRole::Activate + } + None => { + if let SendAddr::Udp(ref addr) = self.path { + event!( + target: "events.net.holepunched", + Level::DEBUG, + node = %self.node_id.fmt_short(), + path = ?addr, + direction = "incoming", + ); + } + PingRole::Activate + } + } + } + } + + pub(super) fn clear(&mut self) { + self.last_ping = None; + self.last_got_ping = None; + self.call_me_maybe_time = None; + self.recent_pong = None; + } + + fn summary(&self, mut w: impl std::fmt::Write) -> std::fmt::Result { + write!(w, "{{ ")?; + if self.is_active() { + write!(w, "active ")?; + } + if let Some(ref pong) = self.recent_pong { + write!(w, "pong-received({:?} ago) ", pong.pong_at.elapsed())?; + } + if let Some(when) = self.last_incoming_ping() { + write!(w, "ping-received({:?} ago) ", when.elapsed())?; + } + if let Some(ref when) = self.last_ping { + write!(w, "ping-sent({:?} ago) ", when.elapsed())?; + } + write!(w, "}}") + } +} + +// TODO: Make an `EndpointPaths` struct and do things nicely. +pub(super) fn summarize_node_paths(paths: &BTreeMap) -> String { + use std::fmt::Write; + + let mut w = String::new(); + write!(&mut w, "[").ok(); + for (i, (ipp, state)) in paths.iter().enumerate() { + if i > 0 { + write!(&mut w, ", ").ok(); + } + write!(&mut w, "{ipp}").ok(); + state.summary(&mut w).ok(); + } + write!(&mut w, "]").ok(); + w +} diff --git a/iroh-net/src/magicsock/node_map/udp_paths.rs b/iroh-net/src/magicsock/node_map/udp_paths.rs index 1154bc19c1..345fff9daf 100644 --- a/iroh-net/src/magicsock/node_map/udp_paths.rs +++ b/iroh-net/src/magicsock/node_map/udp_paths.rs @@ -15,7 +15,8 @@ use tracing::warn; use crate::disco::SendAddr; use super::best_addr::{self, BestAddr}; -use super::node_state::{PathState, PongReply}; +use super::node_state::PongReply; +use super::path_state::PathState; use super::IpPort; /// The address on which to send datagrams over UDP.