Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(iroh-net): Move PathState to its own module #2587

Merged
merged 14 commits into from
Aug 9, 2024
1 change: 1 addition & 0 deletions iroh-net/src/magicsock/node_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use crate::{

mod best_addr;
mod node_state;
mod path_state;
mod udp_paths;

pub use node_state::{ConnectionType, ControlMsg, DirectAddrInfo, NodeInfo};
Expand Down
281 changes: 8 additions & 273 deletions iroh-net/src/magicsock/node_map/node_state.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::{btree_map::Entry, BTreeMap, BTreeSet, HashMap};
use std::collections::{btree_map::Entry, BTreeSet, HashMap};
use std::hash::Hash;
use std::net::{IpAddr, SocketAddr};
use std::time::{Duration, Instant};
Expand All @@ -19,6 +19,7 @@ use crate::util::relay_only_mode;
use crate::{stun, NodeAddr, NodeId};

use super::best_addr::{self, ClearReason, Source};
use super::path_state::{summarize_node_paths, PathState};
use super::udp_paths::{NodeUdpPaths, UdpSendAddr};
use super::IpPort;

Expand All @@ -33,16 +34,12 @@ const LAST_ALIVE_PRUNE_DURATION: Duration = Duration::from_secs(120);
/// How long we wait for a pong reply before assuming it's never coming.
const PING_TIMEOUT_DURATION: Duration = Duration::from_secs(5);

/// The minimum time between pings to an endpoint. (Except in the case of CallMeMaybe frames
/// resetting the counter, as the first pings likely didn't through the firewall)
const DISCO_PING_INTERVAL: Duration = Duration::from_secs(5);

/// The latency at or under which we don't try to upgrade to a better path.
const GOOD_ENOUGH_LATENCY: Duration = Duration::from_millis(5);

/// How long since the last activity we try to keep an established endpoint peering alive.
/// It's also the idle time at which we stop doing STUN queries to keep NAT mappings alive.
const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45);
pub(super) const SESSION_ACTIVE_TIMEOUT: Duration = Duration::from_secs(45);

/// How often we try to upgrade to a better patheven if we have some non-relay route that works.
const UPGRADE_INTERVAL: Duration = Duration::from_secs(60);
Expand Down Expand Up @@ -1130,267 +1127,6 @@ impl From<NodeInfo> for NodeAddr {
}
}

/// State about a particular path to another [`NodeState`].
///
/// This state is used for both the relay path and any direct UDP paths.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(super) struct PathState {
/// The node for which this path exists.
node_id: NodeId,
/// The path this applies for.
path: SendAddr,
/// The last (outgoing) ping time.
last_ping: Option<Instant>,

/// If non-zero, means that this was an endpoint that we learned about at runtime (from an
/// incoming ping). If so, we keep the time updated and use it to discard old candidates.
// NOTE: tx_id Originally added in tailscale due to <https://github.com/tailscale/tailscale/issues/7078>.
last_got_ping: Option<(Instant, stun::TransactionId)>,

/// If non-zero, is the time this endpoint was advertised last via a call-me-maybe disco message.
call_me_maybe_time: Option<Instant>,

/// Last [`PongReply`] received.
pub(super) recent_pong: Option<PongReply>,
/// When was this endpoint last used to transmit payload data (removing ping, pong, etc).
pub(super) last_payload_msg: Option<Instant>,
}

impl PathState {
fn new(node_id: NodeId, path: SendAddr) -> Self {
Self {
node_id,
path,
last_ping: None,
last_got_ping: None,
call_me_maybe_time: None,
recent_pong: None,
last_payload_msg: None,
}
}

pub(super) fn udp_addr(&self) -> Option<SocketAddr> {
match self.path {
SendAddr::Udp(addr) => Some(addr),
SendAddr::Relay(_) => None,
}
}

pub(super) fn with_last_payload(node_id: NodeId, path: SendAddr, now: Instant) -> Self {
PathState {
node_id,
path,
last_ping: None,
last_got_ping: None,
call_me_maybe_time: None,
recent_pong: None,
last_payload_msg: Some(now),
}
}

pub(super) fn with_ping(
node_id: NodeId,
path: SendAddr,
tx_id: stun::TransactionId,
now: Instant,
) -> Self {
let mut new = PathState::new(node_id, path);
new.handle_ping(tx_id, now);
new
}

pub(super) fn add_pong_reply(&mut self, r: PongReply) {
if let SendAddr::Udp(ref path) = self.path {
if self.recent_pong.is_none() {
event!(
target: "events.net.holepunched",
Level::DEBUG,
node = %self.node_id.fmt_short(),
path = ?path,
direction = "outgoing",
);
}
}
self.recent_pong = Some(r);
}

#[cfg(test)]
pub(super) fn with_pong_reply(node_id: NodeId, r: PongReply) -> Self {
PathState {
node_id,
path: r.from.clone(),
last_ping: None,
last_got_ping: None,
call_me_maybe_time: None,
recent_pong: Some(r),
last_payload_msg: None,
}
}

/// Check whether this path is considered active.
///
/// Active means the path has received payload messages within the last
/// [`SESSION_ACTIVE_TIMEOUT`].
///
/// Note that a path might be alive but not active if it's contactable but not in
/// use.
pub(super) fn is_active(&self) -> bool {
self.last_payload_msg
.as_ref()
.map(|instant| instant.elapsed() <= SESSION_ACTIVE_TIMEOUT)
.unwrap_or(false)
}

/// Returns the instant the last incoming ping was received.
pub(super) fn last_incoming_ping(&self) -> Option<&Instant> {
self.last_got_ping.as_ref().map(|(time, _tx_id)| time)
}

/// Reports the last instant this path was considered alive.
///
/// Alive means the path is considered in use by the remote endpoint. Either because we
/// received a payload message, a DISCO message (ping, pong) or it was advertised in a
/// call-me-maybe message.
///
/// This is the most recent instant between:
/// - when last pong was received.
/// - when this path was last advertised in a received CallMeMaybe message.
/// - When the last payload transmission occurred.
/// - when the last ping from them was received.
pub(super) fn last_alive(&self) -> Option<Instant> {
self.recent_pong()
.map(|pong| &pong.pong_at)
.into_iter()
.chain(self.last_payload_msg.as_ref())
.chain(self.call_me_maybe_time.as_ref())
.chain(self.last_incoming_ping())
.max()
.copied()
}

pub(super) fn last_control_msg(&self, now: Instant) -> Option<(Duration, ControlMsg)> {
// get every control message and assign it its kind
let last_pong = self
.recent_pong()
.map(|pong| (pong.pong_at, ControlMsg::Pong));
let last_call_me_maybe = self
.call_me_maybe_time
.as_ref()
.map(|call_me| (*call_me, ControlMsg::CallMeMaybe));
let last_ping = self
.last_incoming_ping()
.map(|ping| (*ping, ControlMsg::Ping));

last_pong
.into_iter()
.chain(last_call_me_maybe)
.chain(last_ping)
.max_by_key(|(instant, _kind)| *instant)
.map(|(instant, kind)| (now.duration_since(instant), kind))
}

/// Returns the most recent pong if available.
pub(super) fn recent_pong(&self) -> Option<&PongReply> {
self.recent_pong.as_ref()
}

/// Returns the latency from the most recent pong, if available.
fn latency(&self) -> Option<Duration> {
self.recent_pong.as_ref().map(|p| p.latency)
}

fn needs_ping(&self, now: &Instant) -> bool {
match self.last_ping {
None => true,
Some(last_ping) => {
let elapsed = now.duration_since(last_ping);

// TODO: remove!
// This logs "ping is too new" for each send whenever the endpoint does *not* need
// a ping. Pretty sure this is not a useful log, but maybe there was a reason?
// if !needs_ping {
// debug!("ping is too new: {}ms", elapsed.as_millis());
// }
elapsed > DISCO_PING_INTERVAL
}
}
}

fn handle_ping(&mut self, tx_id: stun::TransactionId, now: Instant) -> PingRole {
if Some(&tx_id) == self.last_got_ping.as_ref().map(|(_t, tx_id)| tx_id) {
PingRole::Duplicate
} else {
let prev = self.last_got_ping.replace((now, tx_id));
let heartbeat_deadline = HEARTBEAT_INTERVAL + (HEARTBEAT_INTERVAL / 2);
match prev {
Some((prev_time, _tx)) if now.duration_since(prev_time) <= heartbeat_deadline => {
PingRole::LikelyHeartbeat
}
Some((prev_time, _tx)) => {
debug!(
elapsed = ?now.duration_since(prev_time),
"heartbeat missed, reactivating",
);
PingRole::Activate
}
None => {
if let SendAddr::Udp(ref addr) = self.path {
event!(
target: "events.net.holepunched",
Level::DEBUG,
node = %self.node_id.fmt_short(),
path = ?addr,
direction = "incoming",
);
}
PingRole::Activate
}
}
}
}

fn clear(&mut self) {
self.last_ping = None;
self.last_got_ping = None;
self.call_me_maybe_time = None;
self.recent_pong = None;
}

fn summary(&self, mut w: impl std::fmt::Write) -> std::fmt::Result {
write!(w, "{{ ")?;
if self.is_active() {
write!(w, "active ")?;
}
if let Some(ref pong) = self.recent_pong {
write!(w, "pong-received({:?} ago) ", pong.pong_at.elapsed())?;
}
if let Some(when) = self.last_incoming_ping() {
write!(w, "ping-received({:?} ago) ", when.elapsed())?;
}
if let Some(ref when) = self.last_ping {
write!(w, "ping-sent({:?} ago) ", when.elapsed())?;
}
write!(w, "}}")
}
}

// TODO: Make an `EndpointPaths` struct and do things nicely.
fn summarize_node_paths(paths: &BTreeMap<IpPort, PathState>) -> String {
use std::fmt::Write;

let mut w = String::new();
write!(&mut w, "[").ok();
for (i, (ipp, state)) in paths.iter().enumerate() {
if i > 0 {
write!(&mut w, ", ").ok();
}
write!(&mut w, "{ipp}").ok();
state.summary(&mut w).ok();
}
write!(&mut w, "]").ok();
w
}

/// Whether to send a call-me-maybe message after sending pings to all known paths.
///
/// `IfNoRecent` will only send a call-me-maybe if no previous one was sent in the last
Expand Down Expand Up @@ -1555,15 +1291,14 @@ pub enum ConnectionType {

#[cfg(test)]
mod tests {
use std::net::Ipv4Addr;
use std::{collections::BTreeMap, net::Ipv4Addr};

use crate::key::SecretKey;
use crate::magicsock::node_map::{NodeMap, NodeMapInner};

use best_addr::BestAddr;

use super::{
super::{NodeMap, NodeMapInner},
*,
};
use crate::key::SecretKey;
use super::*;

#[test]
fn test_endpoint_infos() {
Expand Down
Loading
Loading